1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include <assert.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30
31 #include "util/hash_table.h"
32 #include "util/os_file.h"
33 #include "util/slab.h"
34
35 #include "drm/freedreno_ringbuffer.h"
36 #include "msm_priv.h"
37
38 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
39 * by avoiding the additional tracking necessary to build cmds/relocs tables
40 * (but still builds a bos table)
41 */
42
43 #define INIT_SIZE 0x1000
44
45 #define SUBALLOC_SIZE (32 * 1024)
46
47 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
48 * instead use a condition-variable. Note that pipe->flush() is not expected
49 * to be a common/hot path.
50 */
51 static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
52 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
53
54
55 struct msm_submit_sp {
56 struct fd_submit base;
57
58 DECLARE_ARRAY(struct fd_bo *, bos);
59
60 /* maps fd_bo to idx in bos table: */
61 struct hash_table *bo_table;
62
63 struct slab_child_pool ring_pool;
64
65 /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
66 * the same underlying bo)..
67 *
68 * We also rely on previous stateobj having been fully constructed
69 * so we can reclaim extra space at it's end.
70 */
71 struct fd_ringbuffer *suballoc_ring;
72
73 /* Flush args, potentially attached to the last submit in the list
74 * of submits to merge:
75 */
76 int in_fence_fd;
77 struct fd_submit_fence *out_fence;
78
79 /* State for enqueued submits:
80 */
81 struct list_head submit_list; /* includes this submit as last element */
82
83 /* Used in case out_fence==NULL: */
84 struct util_queue_fence fence;
85 };
86 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
87
88 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
89 * and sizes. Ie. a finalized buffer can have no more commands appended to
90 * it.
91 */
92 struct msm_cmd_sp {
93 struct fd_bo *ring_bo;
94 unsigned size;
95 };
96
97 struct msm_ringbuffer_sp {
98 struct fd_ringbuffer base;
99
100 /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
101 unsigned offset;
102
103 union {
104 /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
105 * this one
106 */
107 struct {
108 struct fd_pipe *pipe;
109 DECLARE_ARRAY(struct fd_bo *, reloc_bos);
110 };
111 /* for other cases: */
112 struct {
113 struct fd_submit *submit;
114 DECLARE_ARRAY(struct msm_cmd_sp, cmds);
115 };
116 } u;
117
118 struct fd_bo *ring_bo;
119 };
120 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
121
122 static void finalize_current_cmd(struct fd_ringbuffer *ring);
123 static struct fd_ringbuffer *
124 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
125 enum fd_ringbuffer_flags flags);
126
127 /* add (if needed) bo to submit and return index: */
128 static uint32_t
msm_submit_append_bo(struct msm_submit_sp * submit,struct fd_bo * bo)129 msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
130 {
131 struct msm_bo *msm_bo = to_msm_bo(bo);
132 uint32_t idx;
133
134 /* NOTE: it is legal to use the same bo on different threads for
135 * different submits. But it is not legal to use the same submit
136 * from different threads.
137 */
138 idx = READ_ONCE(msm_bo->idx);
139
140 if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
141 uint32_t hash = _mesa_hash_pointer(bo);
142 struct hash_entry *entry;
143
144 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
145 if (entry) {
146 /* found */
147 idx = (uint32_t)(uintptr_t)entry->data;
148 } else {
149 idx = APPEND(submit, bos, fd_bo_ref(bo));
150
151 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
152 (void *)(uintptr_t)idx);
153 }
154 msm_bo->idx = idx;
155 }
156
157 return idx;
158 }
159
160 static void
msm_submit_suballoc_ring_bo(struct fd_submit * submit,struct msm_ringbuffer_sp * msm_ring,uint32_t size)161 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
162 struct msm_ringbuffer_sp *msm_ring, uint32_t size)
163 {
164 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
165 unsigned suballoc_offset = 0;
166 struct fd_bo *suballoc_bo = NULL;
167
168 if (msm_submit->suballoc_ring) {
169 struct msm_ringbuffer_sp *suballoc_ring =
170 to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
171
172 suballoc_bo = suballoc_ring->ring_bo;
173 suballoc_offset =
174 fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset;
175
176 suballoc_offset = align(suballoc_offset, 0x10);
177
178 if ((size + suballoc_offset) > suballoc_bo->size) {
179 suballoc_bo = NULL;
180 }
181 }
182
183 if (!suballoc_bo) {
184 // TODO possibly larger size for streaming bo?
185 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
186 msm_ring->offset = 0;
187 } else {
188 msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
189 msm_ring->offset = suballoc_offset;
190 }
191
192 struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
193
194 msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
195
196 if (old_suballoc_ring)
197 fd_ringbuffer_del(old_suballoc_ring);
198 }
199
200 static struct fd_ringbuffer *
msm_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)201 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
202 enum fd_ringbuffer_flags flags)
203 {
204 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
205 struct msm_ringbuffer_sp *msm_ring;
206
207 msm_ring = slab_alloc(&msm_submit->ring_pool);
208
209 msm_ring->u.submit = submit;
210
211 /* NOTE: needs to be before _suballoc_ring_bo() since it could
212 * increment the refcnt of the current ring
213 */
214 msm_ring->base.refcnt = 1;
215
216 if (flags & FD_RINGBUFFER_STREAMING) {
217 msm_submit_suballoc_ring_bo(submit, msm_ring, size);
218 } else {
219 if (flags & FD_RINGBUFFER_GROWABLE)
220 size = INIT_SIZE;
221
222 msm_ring->offset = 0;
223 msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
224 }
225
226 if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
227 return NULL;
228
229 return &msm_ring->base;
230 }
231
232 /**
233 * Prepare submit for flush, always done synchronously.
234 *
235 * 1) Finalize primary ringbuffer, at this point no more cmdstream may
236 * be written into it, since from the PoV of the upper level driver
237 * the submit is flushed, even if deferred
238 * 2) Add cmdstream bos to bos table
239 * 3) Update bo fences
240 */
241 static bool
msm_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)242 msm_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
243 struct fd_submit_fence *out_fence)
244 {
245 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
246 bool has_shared = false;
247
248 finalize_current_cmd(submit->primary);
249
250 struct msm_ringbuffer_sp *primary =
251 to_msm_ringbuffer_sp(submit->primary);
252
253 for (unsigned i = 0; i < primary->u.nr_cmds; i++)
254 msm_submit_append_bo(msm_submit, primary->u.cmds[i].ring_bo);
255
256 simple_mtx_lock(&table_lock);
257 for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
258 fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
259 has_shared |= msm_submit->bos[i]->shared;
260 }
261 simple_mtx_unlock(&table_lock);
262
263 msm_submit->out_fence = out_fence;
264 msm_submit->in_fence_fd = (in_fence_fd == -1) ?
265 -1 : os_dupfd_cloexec(in_fence_fd);
266
267 return has_shared;
268 }
269
270 static int
flush_submit_list(struct list_head * submit_list)271 flush_submit_list(struct list_head *submit_list)
272 {
273 struct msm_submit_sp *msm_submit = to_msm_submit_sp(last_submit(submit_list));
274 struct msm_pipe *msm_pipe = to_msm_pipe(msm_submit->base.pipe);
275 struct drm_msm_gem_submit req = {
276 .flags = msm_pipe->pipe,
277 .queueid = msm_pipe->queue_id,
278 };
279 int ret;
280
281 unsigned nr_cmds = 0;
282
283 /* Determine the number of extra cmds's from deferred submits that
284 * we will be merging in:
285 */
286 foreach_submit (submit, submit_list) {
287 assert(submit->pipe == &msm_pipe->base);
288 nr_cmds += to_msm_ringbuffer_sp(submit->primary)->u.nr_cmds;
289 }
290
291 struct drm_msm_gem_submit_cmd cmds[nr_cmds];
292
293 unsigned cmd_idx = 0;
294
295 /* Build up the table of cmds, and for all but the last submit in the
296 * list, merge their bo tables into the last submit.
297 */
298 foreach_submit_safe (submit, submit_list) {
299 struct msm_ringbuffer_sp *deferred_primary =
300 to_msm_ringbuffer_sp(submit->primary);
301
302 for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
303 cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
304 cmds[cmd_idx].submit_idx =
305 msm_submit_append_bo(msm_submit, deferred_primary->u.cmds[i].ring_bo);
306 cmds[cmd_idx].submit_offset = deferred_primary->offset;
307 cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
308 cmds[cmd_idx].pad = 0;
309 cmds[cmd_idx].nr_relocs = 0;
310
311 cmd_idx++;
312 }
313
314 /* We are merging all the submits in the list into the last submit,
315 * so the remainder of the loop body doesn't apply to the last submit
316 */
317 if (submit == last_submit(submit_list)) {
318 DEBUG_MSG("merged %u submits", cmd_idx);
319 break;
320 }
321
322 struct msm_submit_sp *msm_deferred_submit = to_msm_submit_sp(submit);
323 for (unsigned i = 0; i < msm_deferred_submit->nr_bos; i++) {
324 /* Note: if bo is used in both the current submit and the deferred
325 * submit being merged, we expect to hit the fast-path as we add it
326 * to the current submit:
327 */
328 msm_submit_append_bo(msm_submit, msm_deferred_submit->bos[i]);
329 }
330
331 /* Now that the cmds/bos have been transfered over to the current submit,
332 * we can remove the deferred submit from the list and drop it's reference
333 */
334 list_del(&submit->node);
335 fd_submit_del(submit);
336 }
337
338 if (msm_submit->in_fence_fd != -1) {
339 req.flags |= MSM_SUBMIT_FENCE_FD_IN;
340 req.fence_fd = msm_submit->in_fence_fd;
341 msm_pipe->no_implicit_sync = true;
342 }
343
344 if (msm_pipe->no_implicit_sync) {
345 req.flags |= MSM_SUBMIT_NO_IMPLICIT;
346 }
347
348 if (msm_submit->out_fence && msm_submit->out_fence->use_fence_fd) {
349 req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
350 }
351
352 /* Needs to be after get_cmd() as that could create bos/cmds table:
353 *
354 * NOTE allocate on-stack in the common case, but with an upper-
355 * bound to limit on-stack allocation to 4k:
356 */
357 const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096;
358 bool bos_on_stack = msm_submit->nr_bos < bo_limit;
359 struct drm_msm_gem_submit_bo
360 _submit_bos[bos_on_stack ? msm_submit->nr_bos : 0];
361 struct drm_msm_gem_submit_bo *submit_bos;
362 if (bos_on_stack) {
363 submit_bos = _submit_bos;
364 } else {
365 submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
366 }
367
368 for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
369 submit_bos[i].flags = msm_submit->bos[i]->reloc_flags;
370 submit_bos[i].handle = msm_submit->bos[i]->handle;
371 submit_bos[i].presumed = 0;
372 }
373
374 req.bos = VOID2U64(submit_bos);
375 req.nr_bos = msm_submit->nr_bos;
376 req.cmds = VOID2U64(cmds);
377 req.nr_cmds = nr_cmds;
378
379 DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
380
381 ret = drmCommandWriteRead(msm_pipe->base.dev->fd, DRM_MSM_GEM_SUBMIT, &req,
382 sizeof(req));
383 if (ret) {
384 ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
385 msm_dump_submit(&req);
386 } else if (!ret && msm_submit->out_fence) {
387 msm_submit->out_fence->fence.kfence = req.fence;
388 msm_submit->out_fence->fence.ufence = msm_submit->base.fence;
389 msm_submit->out_fence->fence_fd = req.fence_fd;
390 }
391
392 if (!bos_on_stack)
393 free(submit_bos);
394
395 pthread_mutex_lock(&flush_mtx);
396 assert(fd_fence_before(msm_pipe->last_submit_fence, msm_submit->base.fence));
397 msm_pipe->last_submit_fence = msm_submit->base.fence;
398 pthread_cond_broadcast(&flush_cnd);
399 pthread_mutex_unlock(&flush_mtx);
400
401 if (msm_submit->in_fence_fd != -1)
402 close(msm_submit->in_fence_fd);
403
404 return ret;
405 }
406
407 static void
msm_submit_sp_flush_execute(void * job,void * gdata,int thread_index)408 msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
409 {
410 struct fd_submit *submit = job;
411 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
412
413 flush_submit_list(&msm_submit->submit_list);
414
415 DEBUG_MSG("finish: %u", submit->fence);
416 }
417
418 static void
msm_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)419 msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
420 {
421 struct fd_submit *submit = job;
422 fd_submit_del(submit);
423 }
424
425 static int
enqueue_submit_list(struct list_head * submit_list)426 enqueue_submit_list(struct list_head *submit_list)
427 {
428 struct fd_submit *submit = last_submit(submit_list);
429 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
430 struct msm_device *msm_dev = to_msm_device(submit->pipe->dev);
431
432 list_replace(submit_list, &msm_submit->submit_list);
433 list_inithead(submit_list);
434
435 struct util_queue_fence *fence;
436 if (msm_submit->out_fence) {
437 fence = &msm_submit->out_fence->ready;
438 } else {
439 util_queue_fence_init(&msm_submit->fence);
440 fence = &msm_submit->fence;
441 }
442
443 DEBUG_MSG("enqueue: %u", submit->fence);
444
445 util_queue_add_job(&msm_dev->submit_queue,
446 submit, fence,
447 msm_submit_sp_flush_execute,
448 msm_submit_sp_flush_cleanup,
449 0);
450
451 return 0;
452 }
453
454 static bool
should_defer(struct fd_submit * submit)455 should_defer(struct fd_submit *submit)
456 {
457 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
458
459 /* if too many bo's, it may not be worth the CPU cost of submit merging: */
460 if (msm_submit->nr_bos > 30)
461 return false;
462
463 /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
464 * cmds before we exceed the size of the ringbuffer, which results in
465 * deadlock writing into the RB (ie. kernel doesn't finish writing into
466 * the RB so it doesn't kick the GPU to start consuming from the RB)
467 */
468 if (submit->pipe->dev->deferred_cmds > 128)
469 return false;
470
471 return true;
472 }
473
474 static int
msm_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)475 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
476 struct fd_submit_fence *out_fence)
477 {
478 struct fd_device *dev = submit->pipe->dev;
479 struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
480
481 /* Acquire lock before flush_prep() because it is possible to race between
482 * this and pipe->flush():
483 */
484 simple_mtx_lock(&dev->submit_lock);
485
486 /* If there are deferred submits from another fd_pipe, flush them now,
487 * since we can't merge submits from different submitqueue's (ie. they
488 * could have different priority, etc)
489 */
490 if (!list_is_empty(&dev->deferred_submits) &&
491 (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
492 struct list_head submit_list;
493
494 list_replace(&dev->deferred_submits, &submit_list);
495 list_inithead(&dev->deferred_submits);
496 dev->deferred_cmds = 0;
497
498 enqueue_submit_list(&submit_list);
499 }
500
501 list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
502
503 bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
504
505 assert(fd_fence_before(msm_pipe->last_enqueue_fence, submit->fence));
506 msm_pipe->last_enqueue_fence = submit->fence;
507
508 /* If we don't need an out-fence, we can defer the submit.
509 *
510 * TODO we could defer submits with in-fence as well.. if we took our own
511 * reference to the fd, and merged all the in-fence-fd's when we flush the
512 * deferred submits
513 */
514 if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
515 DEBUG_MSG("defer: %u", submit->fence);
516 dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
517 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
518 simple_mtx_unlock(&dev->submit_lock);
519
520 return 0;
521 }
522
523 struct list_head submit_list;
524
525 list_replace(&dev->deferred_submits, &submit_list);
526 list_inithead(&dev->deferred_submits);
527 dev->deferred_cmds = 0;
528
529 simple_mtx_unlock(&dev->submit_lock);
530
531 return enqueue_submit_list(&submit_list);
532 }
533
534 void
msm_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)535 msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
536 {
537 struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
538 struct fd_device *dev = pipe->dev;
539 struct list_head submit_list;
540
541 DEBUG_MSG("flush: %u", fence);
542
543 list_inithead(&submit_list);
544
545 simple_mtx_lock(&dev->submit_lock);
546
547 assert(!fd_fence_after(fence, msm_pipe->last_enqueue_fence));
548
549 foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
550 /* We should never have submits from multiple pipes in the deferred
551 * list. If we did, we couldn't compare their fence to our fence,
552 * since each fd_pipe is an independent timeline.
553 */
554 if (deferred_submit->pipe != pipe)
555 break;
556
557 if (fd_fence_after(deferred_submit->fence, fence))
558 break;
559
560 list_del(&deferred_submit->node);
561 list_addtail(&deferred_submit->node, &submit_list);
562 dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
563 }
564
565 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
566
567 simple_mtx_unlock(&dev->submit_lock);
568
569 if (list_is_empty(&submit_list))
570 goto flush_sync;
571
572 enqueue_submit_list(&submit_list);
573
574 flush_sync:
575 /* Once we are sure that we've enqueued at least up to the requested
576 * submit, we need to be sure that submitq has caught up and flushed
577 * them to the kernel
578 */
579 pthread_mutex_lock(&flush_mtx);
580 while (fd_fence_before(msm_pipe->last_submit_fence, fence)) {
581 pthread_cond_wait(&flush_cnd, &flush_mtx);
582 }
583 pthread_mutex_unlock(&flush_mtx);
584 }
585
586 static void
msm_submit_sp_destroy(struct fd_submit * submit)587 msm_submit_sp_destroy(struct fd_submit *submit)
588 {
589 struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
590
591 if (msm_submit->suballoc_ring)
592 fd_ringbuffer_del(msm_submit->suballoc_ring);
593
594 _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
595
596 // TODO it would be nice to have a way to debug_assert() if all
597 // rb's haven't been free'd back to the slab, because that is
598 // an indication that we are leaking bo's
599 slab_destroy_child(&msm_submit->ring_pool);
600
601 for (unsigned i = 0; i < msm_submit->nr_bos; i++)
602 fd_bo_del(msm_submit->bos[i]);
603
604 free(msm_submit->bos);
605 free(msm_submit);
606 }
607
608 static const struct fd_submit_funcs submit_funcs = {
609 .new_ringbuffer = msm_submit_sp_new_ringbuffer,
610 .flush = msm_submit_sp_flush,
611 .destroy = msm_submit_sp_destroy,
612 };
613
614 struct fd_submit *
msm_submit_sp_new(struct fd_pipe * pipe)615 msm_submit_sp_new(struct fd_pipe *pipe)
616 {
617 struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
618 struct fd_submit *submit;
619
620 msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
621 _mesa_key_pointer_equal);
622
623 slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
624
625 submit = &msm_submit->base;
626 submit->funcs = &submit_funcs;
627
628 return submit;
629 }
630
631 void
msm_pipe_sp_ringpool_init(struct msm_pipe * msm_pipe)632 msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
633 {
634 // TODO tune size:
635 slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp),
636 16);
637 }
638
639 void
msm_pipe_sp_ringpool_fini(struct msm_pipe * msm_pipe)640 msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
641 {
642 if (msm_pipe->ring_pool.num_elements)
643 slab_destroy_parent(&msm_pipe->ring_pool);
644 }
645
646 static void
finalize_current_cmd(struct fd_ringbuffer * ring)647 finalize_current_cmd(struct fd_ringbuffer *ring)
648 {
649 debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
650
651 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
652 APPEND(&msm_ring->u, cmds,
653 (struct msm_cmd_sp){
654 .ring_bo = fd_bo_ref(msm_ring->ring_bo),
655 .size = offset_bytes(ring->cur, ring->start),
656 });
657 }
658
659 static void
msm_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)660 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
661 {
662 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
663 struct fd_pipe *pipe = msm_ring->u.submit->pipe;
664
665 debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
666
667 finalize_current_cmd(ring);
668
669 fd_bo_del(msm_ring->ring_bo);
670 msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
671
672 ring->start = fd_bo_map(msm_ring->ring_bo);
673 ring->end = &(ring->start[size / 4]);
674 ring->cur = ring->start;
675 ring->size = size;
676 }
677
678 static inline bool
msm_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)679 msm_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
680 {
681 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
682
683 for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
684 if (msm_ring->u.reloc_bos[i] == bo)
685 return true;
686 }
687 return false;
688 }
689
690 #define PTRSZ 64
691 #include "msm_ringbuffer_sp.h"
692 #undef PTRSZ
693 #define PTRSZ 32
694 #include "msm_ringbuffer_sp.h"
695 #undef PTRSZ
696
697 static uint32_t
msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)698 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
699 {
700 if (ring->flags & FD_RINGBUFFER_GROWABLE)
701 return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
702 return 1;
703 }
704
705 static bool
msm_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)706 msm_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
707 {
708 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
709 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
710 struct fd_submit *submit = msm_ring->u.submit;
711
712 if (to_msm_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
713 return false;
714 }
715
716 return true;
717 }
718
719 static void
msm_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)720 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
721 {
722 struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
723
724 fd_bo_del(msm_ring->ring_bo);
725
726 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
727 for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
728 fd_bo_del(msm_ring->u.reloc_bos[i]);
729 }
730 free(msm_ring->u.reloc_bos);
731
732 free(msm_ring);
733 } else {
734 struct fd_submit *submit = msm_ring->u.submit;
735
736 for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
737 fd_bo_del(msm_ring->u.cmds[i].ring_bo);
738 }
739 free(msm_ring->u.cmds);
740
741 slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
742 }
743 }
744
745 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
746 .grow = msm_ringbuffer_sp_grow,
747 .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32,
748 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
749 .cmd_count = msm_ringbuffer_sp_cmd_count,
750 .check_size = msm_ringbuffer_sp_check_size,
751 .destroy = msm_ringbuffer_sp_destroy,
752 };
753
754 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
755 .grow = msm_ringbuffer_sp_grow,
756 .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32,
757 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
758 .cmd_count = msm_ringbuffer_sp_cmd_count,
759 .destroy = msm_ringbuffer_sp_destroy,
760 };
761
762 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
763 .grow = msm_ringbuffer_sp_grow,
764 .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64,
765 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
766 .cmd_count = msm_ringbuffer_sp_cmd_count,
767 .check_size = msm_ringbuffer_sp_check_size,
768 .destroy = msm_ringbuffer_sp_destroy,
769 };
770
771 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
772 .grow = msm_ringbuffer_sp_grow,
773 .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64,
774 .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
775 .cmd_count = msm_ringbuffer_sp_cmd_count,
776 .destroy = msm_ringbuffer_sp_destroy,
777 };
778
779 static inline struct fd_ringbuffer *
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp * msm_ring,uint32_t size,enum fd_ringbuffer_flags flags)780 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
781 enum fd_ringbuffer_flags flags)
782 {
783 struct fd_ringbuffer *ring = &msm_ring->base;
784
785 /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
786 STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
787 STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
788 STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
789
790 debug_assert(msm_ring->ring_bo);
791
792 uint8_t *base = fd_bo_map(msm_ring->ring_bo);
793 ring->start = (void *)(base + msm_ring->offset);
794 ring->end = &(ring->start[size / 4]);
795 ring->cur = ring->start;
796
797 ring->size = size;
798 ring->flags = flags;
799
800 if (flags & _FD_RINGBUFFER_OBJECT) {
801 if (fd_dev_64b(&msm_ring->u.pipe->dev_id)) {
802 ring->funcs = &ring_funcs_obj_64;
803 } else {
804 ring->funcs = &ring_funcs_obj_32;
805 }
806 } else {
807 if (fd_dev_64b(&msm_ring->u.submit->pipe->dev_id)) {
808 ring->funcs = &ring_funcs_nonobj_64;
809 } else {
810 ring->funcs = &ring_funcs_nonobj_32;
811 }
812 }
813
814 // TODO initializing these could probably be conditional on flags
815 // since unneed for FD_RINGBUFFER_STAGING case..
816 msm_ring->u.cmds = NULL;
817 msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
818
819 msm_ring->u.reloc_bos = NULL;
820 msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
821
822 return ring;
823 }
824
825 struct fd_ringbuffer *
msm_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)826 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
827 {
828 struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
829 struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
830
831 /* Lock access to the msm_pipe->suballoc_* since ringbuffer object allocation
832 * can happen both on the frontend (most CSOs) and the driver thread (a6xx
833 * cached tex state, for example)
834 */
835 static simple_mtx_t suballoc_lock = _SIMPLE_MTX_INITIALIZER_NP;
836 simple_mtx_lock(&suballoc_lock);
837
838 /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
839 msm_ring->offset = align(msm_pipe->suballoc_offset, 64);
840 if (!msm_pipe->suballoc_bo ||
841 msm_ring->offset + size > fd_bo_size(msm_pipe->suballoc_bo)) {
842 if (msm_pipe->suballoc_bo)
843 fd_bo_del(msm_pipe->suballoc_bo);
844 msm_pipe->suballoc_bo =
845 fd_bo_new_ring(pipe->dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
846 msm_ring->offset = 0;
847 }
848
849 msm_ring->u.pipe = pipe;
850 msm_ring->ring_bo = fd_bo_ref(msm_pipe->suballoc_bo);
851 msm_ring->base.refcnt = 1;
852
853 msm_pipe->suballoc_offset = msm_ring->offset + size;
854
855 simple_mtx_unlock(&suballoc_lock);
856
857 return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
858 }
859