1 /*
2  * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include <assert.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30 
31 #include "util/hash_table.h"
32 #include "util/os_file.h"
33 #include "util/slab.h"
34 
35 #include "drm/freedreno_ringbuffer.h"
36 #include "msm_priv.h"
37 
38 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
39  * by avoiding the additional tracking necessary to build cmds/relocs tables
40  * (but still builds a bos table)
41  */
42 
43 #define INIT_SIZE 0x1000
44 
45 #define SUBALLOC_SIZE (32 * 1024)
46 
47 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
48  * instead use a condition-variable.  Note that pipe->flush() is not expected
49  * to be a common/hot path.
50  */
51 static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
52 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
53 
54 
55 struct msm_submit_sp {
56    struct fd_submit base;
57 
58    DECLARE_ARRAY(struct fd_bo *, bos);
59 
60    /* maps fd_bo to idx in bos table: */
61    struct hash_table *bo_table;
62 
63    struct slab_child_pool ring_pool;
64 
65    /* Allow for sub-allocation of stateobj ring buffers (ie. sharing
66     * the same underlying bo)..
67     *
68     * We also rely on previous stateobj having been fully constructed
69     * so we can reclaim extra space at it's end.
70     */
71    struct fd_ringbuffer *suballoc_ring;
72 
73    /* Flush args, potentially attached to the last submit in the list
74     * of submits to merge:
75     */
76    int in_fence_fd;
77    struct fd_submit_fence *out_fence;
78 
79    /* State for enqueued submits:
80     */
81    struct list_head submit_list;   /* includes this submit as last element */
82 
83    /* Used in case out_fence==NULL: */
84    struct util_queue_fence fence;
85 };
86 FD_DEFINE_CAST(fd_submit, msm_submit_sp);
87 
88 /* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
89  * and sizes.  Ie. a finalized buffer can have no more commands appended to
90  * it.
91  */
92 struct msm_cmd_sp {
93    struct fd_bo *ring_bo;
94    unsigned size;
95 };
96 
97 struct msm_ringbuffer_sp {
98    struct fd_ringbuffer base;
99 
100    /* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
101    unsigned offset;
102 
103    union {
104       /* for _FD_RINGBUFFER_OBJECT case, the array of BOs referenced from
105        * this one
106        */
107       struct {
108          struct fd_pipe *pipe;
109          DECLARE_ARRAY(struct fd_bo *, reloc_bos);
110       };
111       /* for other cases: */
112       struct {
113          struct fd_submit *submit;
114          DECLARE_ARRAY(struct msm_cmd_sp, cmds);
115       };
116    } u;
117 
118    struct fd_bo *ring_bo;
119 };
120 FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
121 
122 static void finalize_current_cmd(struct fd_ringbuffer *ring);
123 static struct fd_ringbuffer *
124 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
125                        enum fd_ringbuffer_flags flags);
126 
127 /* add (if needed) bo to submit and return index: */
128 static uint32_t
msm_submit_append_bo(struct msm_submit_sp * submit,struct fd_bo * bo)129 msm_submit_append_bo(struct msm_submit_sp *submit, struct fd_bo *bo)
130 {
131    struct msm_bo *msm_bo = to_msm_bo(bo);
132    uint32_t idx;
133 
134    /* NOTE: it is legal to use the same bo on different threads for
135     * different submits.  But it is not legal to use the same submit
136     * from different threads.
137     */
138    idx = READ_ONCE(msm_bo->idx);
139 
140    if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
141       uint32_t hash = _mesa_hash_pointer(bo);
142       struct hash_entry *entry;
143 
144       entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
145       if (entry) {
146          /* found */
147          idx = (uint32_t)(uintptr_t)entry->data;
148       } else {
149          idx = APPEND(submit, bos, fd_bo_ref(bo));
150 
151          _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
152                                             (void *)(uintptr_t)idx);
153       }
154       msm_bo->idx = idx;
155    }
156 
157    return idx;
158 }
159 
160 static void
msm_submit_suballoc_ring_bo(struct fd_submit * submit,struct msm_ringbuffer_sp * msm_ring,uint32_t size)161 msm_submit_suballoc_ring_bo(struct fd_submit *submit,
162                             struct msm_ringbuffer_sp *msm_ring, uint32_t size)
163 {
164    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
165    unsigned suballoc_offset = 0;
166    struct fd_bo *suballoc_bo = NULL;
167 
168    if (msm_submit->suballoc_ring) {
169       struct msm_ringbuffer_sp *suballoc_ring =
170          to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
171 
172       suballoc_bo = suballoc_ring->ring_bo;
173       suballoc_offset =
174          fd_ringbuffer_size(msm_submit->suballoc_ring) + suballoc_ring->offset;
175 
176       suballoc_offset = align(suballoc_offset, 0x10);
177 
178       if ((size + suballoc_offset) > suballoc_bo->size) {
179          suballoc_bo = NULL;
180       }
181    }
182 
183    if (!suballoc_bo) {
184       // TODO possibly larger size for streaming bo?
185       msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
186       msm_ring->offset = 0;
187    } else {
188       msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
189       msm_ring->offset = suballoc_offset;
190    }
191 
192    struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
193 
194    msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
195 
196    if (old_suballoc_ring)
197       fd_ringbuffer_del(old_suballoc_ring);
198 }
199 
200 static struct fd_ringbuffer *
msm_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)201 msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
202                              enum fd_ringbuffer_flags flags)
203 {
204    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
205    struct msm_ringbuffer_sp *msm_ring;
206 
207    msm_ring = slab_alloc(&msm_submit->ring_pool);
208 
209    msm_ring->u.submit = submit;
210 
211    /* NOTE: needs to be before _suballoc_ring_bo() since it could
212     * increment the refcnt of the current ring
213     */
214    msm_ring->base.refcnt = 1;
215 
216    if (flags & FD_RINGBUFFER_STREAMING) {
217       msm_submit_suballoc_ring_bo(submit, msm_ring, size);
218    } else {
219       if (flags & FD_RINGBUFFER_GROWABLE)
220          size = INIT_SIZE;
221 
222       msm_ring->offset = 0;
223       msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
224    }
225 
226    if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
227       return NULL;
228 
229    return &msm_ring->base;
230 }
231 
232 /**
233  * Prepare submit for flush, always done synchronously.
234  *
235  * 1) Finalize primary ringbuffer, at this point no more cmdstream may
236  *    be written into it, since from the PoV of the upper level driver
237  *    the submit is flushed, even if deferred
238  * 2) Add cmdstream bos to bos table
239  * 3) Update bo fences
240  */
241 static bool
msm_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)242 msm_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
243                          struct fd_submit_fence *out_fence)
244 {
245    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
246    bool has_shared = false;
247 
248    finalize_current_cmd(submit->primary);
249 
250    struct msm_ringbuffer_sp *primary =
251       to_msm_ringbuffer_sp(submit->primary);
252 
253    for (unsigned i = 0; i < primary->u.nr_cmds; i++)
254       msm_submit_append_bo(msm_submit, primary->u.cmds[i].ring_bo);
255 
256    simple_mtx_lock(&table_lock);
257    for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
258       fd_bo_add_fence(msm_submit->bos[i], submit->pipe, submit->fence);
259       has_shared |= msm_submit->bos[i]->shared;
260    }
261    simple_mtx_unlock(&table_lock);
262 
263    msm_submit->out_fence   = out_fence;
264    msm_submit->in_fence_fd = (in_fence_fd == -1) ?
265          -1 : os_dupfd_cloexec(in_fence_fd);
266 
267    return has_shared;
268 }
269 
270 static int
flush_submit_list(struct list_head * submit_list)271 flush_submit_list(struct list_head *submit_list)
272 {
273    struct msm_submit_sp *msm_submit = to_msm_submit_sp(last_submit(submit_list));
274    struct msm_pipe *msm_pipe = to_msm_pipe(msm_submit->base.pipe);
275    struct drm_msm_gem_submit req = {
276       .flags = msm_pipe->pipe,
277       .queueid = msm_pipe->queue_id,
278    };
279    int ret;
280 
281    unsigned nr_cmds = 0;
282 
283    /* Determine the number of extra cmds's from deferred submits that
284     * we will be merging in:
285     */
286    foreach_submit (submit, submit_list) {
287       assert(submit->pipe == &msm_pipe->base);
288       nr_cmds += to_msm_ringbuffer_sp(submit->primary)->u.nr_cmds;
289    }
290 
291    struct drm_msm_gem_submit_cmd cmds[nr_cmds];
292 
293    unsigned cmd_idx = 0;
294 
295    /* Build up the table of cmds, and for all but the last submit in the
296     * list, merge their bo tables into the last submit.
297     */
298    foreach_submit_safe (submit, submit_list) {
299       struct msm_ringbuffer_sp *deferred_primary =
300          to_msm_ringbuffer_sp(submit->primary);
301 
302       for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) {
303          cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF;
304          cmds[cmd_idx].submit_idx =
305                msm_submit_append_bo(msm_submit, deferred_primary->u.cmds[i].ring_bo);
306          cmds[cmd_idx].submit_offset = deferred_primary->offset;
307          cmds[cmd_idx].size = deferred_primary->u.cmds[i].size;
308          cmds[cmd_idx].pad = 0;
309          cmds[cmd_idx].nr_relocs = 0;
310 
311          cmd_idx++;
312       }
313 
314       /* We are merging all the submits in the list into the last submit,
315        * so the remainder of the loop body doesn't apply to the last submit
316        */
317       if (submit == last_submit(submit_list)) {
318          DEBUG_MSG("merged %u submits", cmd_idx);
319          break;
320       }
321 
322       struct msm_submit_sp *msm_deferred_submit = to_msm_submit_sp(submit);
323       for (unsigned i = 0; i < msm_deferred_submit->nr_bos; i++) {
324          /* Note: if bo is used in both the current submit and the deferred
325           * submit being merged, we expect to hit the fast-path as we add it
326           * to the current submit:
327           */
328          msm_submit_append_bo(msm_submit, msm_deferred_submit->bos[i]);
329       }
330 
331       /* Now that the cmds/bos have been transfered over to the current submit,
332        * we can remove the deferred submit from the list and drop it's reference
333        */
334       list_del(&submit->node);
335       fd_submit_del(submit);
336    }
337 
338    if (msm_submit->in_fence_fd != -1) {
339       req.flags |= MSM_SUBMIT_FENCE_FD_IN;
340       req.fence_fd = msm_submit->in_fence_fd;
341       msm_pipe->no_implicit_sync = true;
342    }
343 
344    if (msm_pipe->no_implicit_sync) {
345       req.flags |= MSM_SUBMIT_NO_IMPLICIT;
346    }
347 
348    if (msm_submit->out_fence && msm_submit->out_fence->use_fence_fd) {
349       req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
350    }
351 
352    /* Needs to be after get_cmd() as that could create bos/cmds table:
353     *
354     * NOTE allocate on-stack in the common case, but with an upper-
355     * bound to limit on-stack allocation to 4k:
356     */
357    const unsigned bo_limit = sizeof(struct drm_msm_gem_submit_bo) / 4096;
358    bool bos_on_stack = msm_submit->nr_bos < bo_limit;
359    struct drm_msm_gem_submit_bo
360       _submit_bos[bos_on_stack ? msm_submit->nr_bos : 0];
361    struct drm_msm_gem_submit_bo *submit_bos;
362    if (bos_on_stack) {
363       submit_bos = _submit_bos;
364    } else {
365       submit_bos = malloc(msm_submit->nr_bos * sizeof(submit_bos[0]));
366    }
367 
368    for (unsigned i = 0; i < msm_submit->nr_bos; i++) {
369       submit_bos[i].flags = msm_submit->bos[i]->reloc_flags;
370       submit_bos[i].handle = msm_submit->bos[i]->handle;
371       submit_bos[i].presumed = 0;
372    }
373 
374    req.bos = VOID2U64(submit_bos);
375    req.nr_bos = msm_submit->nr_bos;
376    req.cmds = VOID2U64(cmds);
377    req.nr_cmds = nr_cmds;
378 
379    DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
380 
381    ret = drmCommandWriteRead(msm_pipe->base.dev->fd, DRM_MSM_GEM_SUBMIT, &req,
382                              sizeof(req));
383    if (ret) {
384       ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
385       msm_dump_submit(&req);
386    } else if (!ret && msm_submit->out_fence) {
387       msm_submit->out_fence->fence.kfence = req.fence;
388       msm_submit->out_fence->fence.ufence = msm_submit->base.fence;
389       msm_submit->out_fence->fence_fd = req.fence_fd;
390    }
391 
392    if (!bos_on_stack)
393       free(submit_bos);
394 
395    pthread_mutex_lock(&flush_mtx);
396    assert(fd_fence_before(msm_pipe->last_submit_fence, msm_submit->base.fence));
397    msm_pipe->last_submit_fence = msm_submit->base.fence;
398    pthread_cond_broadcast(&flush_cnd);
399    pthread_mutex_unlock(&flush_mtx);
400 
401    if (msm_submit->in_fence_fd != -1)
402       close(msm_submit->in_fence_fd);
403 
404    return ret;
405 }
406 
407 static void
msm_submit_sp_flush_execute(void * job,void * gdata,int thread_index)408 msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
409 {
410    struct fd_submit *submit = job;
411    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
412 
413    flush_submit_list(&msm_submit->submit_list);
414 
415    DEBUG_MSG("finish: %u", submit->fence);
416 }
417 
418 static void
msm_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)419 msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
420 {
421    struct fd_submit *submit = job;
422    fd_submit_del(submit);
423 }
424 
425 static int
enqueue_submit_list(struct list_head * submit_list)426 enqueue_submit_list(struct list_head *submit_list)
427 {
428    struct fd_submit *submit = last_submit(submit_list);
429    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
430    struct msm_device *msm_dev = to_msm_device(submit->pipe->dev);
431 
432    list_replace(submit_list, &msm_submit->submit_list);
433    list_inithead(submit_list);
434 
435    struct util_queue_fence *fence;
436    if (msm_submit->out_fence) {
437       fence = &msm_submit->out_fence->ready;
438    } else {
439       util_queue_fence_init(&msm_submit->fence);
440       fence = &msm_submit->fence;
441    }
442 
443    DEBUG_MSG("enqueue: %u", submit->fence);
444 
445    util_queue_add_job(&msm_dev->submit_queue,
446                       submit, fence,
447                       msm_submit_sp_flush_execute,
448                       msm_submit_sp_flush_cleanup,
449                       0);
450 
451    return 0;
452 }
453 
454 static bool
should_defer(struct fd_submit * submit)455 should_defer(struct fd_submit *submit)
456 {
457    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
458 
459    /* if too many bo's, it may not be worth the CPU cost of submit merging: */
460    if (msm_submit->nr_bos > 30)
461       return false;
462 
463    /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
464     * cmds before we exceed the size of the ringbuffer, which results in
465     * deadlock writing into the RB (ie. kernel doesn't finish writing into
466     * the RB so it doesn't kick the GPU to start consuming from the RB)
467     */
468    if (submit->pipe->dev->deferred_cmds > 128)
469       return false;
470 
471    return true;
472 }
473 
474 static int
msm_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)475 msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
476                     struct fd_submit_fence *out_fence)
477 {
478    struct fd_device *dev = submit->pipe->dev;
479    struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
480 
481    /* Acquire lock before flush_prep() because it is possible to race between
482     * this and pipe->flush():
483     */
484    simple_mtx_lock(&dev->submit_lock);
485 
486    /* If there are deferred submits from another fd_pipe, flush them now,
487     * since we can't merge submits from different submitqueue's (ie. they
488     * could have different priority, etc)
489     */
490    if (!list_is_empty(&dev->deferred_submits) &&
491        (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
492       struct list_head submit_list;
493 
494       list_replace(&dev->deferred_submits, &submit_list);
495       list_inithead(&dev->deferred_submits);
496       dev->deferred_cmds = 0;
497 
498       enqueue_submit_list(&submit_list);
499    }
500 
501    list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
502 
503    bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
504 
505    assert(fd_fence_before(msm_pipe->last_enqueue_fence, submit->fence));
506    msm_pipe->last_enqueue_fence = submit->fence;
507 
508    /* If we don't need an out-fence, we can defer the submit.
509     *
510     * TODO we could defer submits with in-fence as well.. if we took our own
511     * reference to the fd, and merged all the in-fence-fd's when we flush the
512     * deferred submits
513     */
514    if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
515       DEBUG_MSG("defer: %u", submit->fence);
516       dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
517       assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
518       simple_mtx_unlock(&dev->submit_lock);
519 
520       return 0;
521    }
522 
523    struct list_head submit_list;
524 
525    list_replace(&dev->deferred_submits, &submit_list);
526    list_inithead(&dev->deferred_submits);
527    dev->deferred_cmds = 0;
528 
529    simple_mtx_unlock(&dev->submit_lock);
530 
531    return enqueue_submit_list(&submit_list);
532 }
533 
534 void
msm_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)535 msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
536 {
537    struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
538    struct fd_device *dev = pipe->dev;
539    struct list_head submit_list;
540 
541    DEBUG_MSG("flush: %u", fence);
542 
543    list_inithead(&submit_list);
544 
545    simple_mtx_lock(&dev->submit_lock);
546 
547    assert(!fd_fence_after(fence, msm_pipe->last_enqueue_fence));
548 
549    foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
550       /* We should never have submits from multiple pipes in the deferred
551        * list.  If we did, we couldn't compare their fence to our fence,
552        * since each fd_pipe is an independent timeline.
553        */
554       if (deferred_submit->pipe != pipe)
555          break;
556 
557       if (fd_fence_after(deferred_submit->fence, fence))
558          break;
559 
560       list_del(&deferred_submit->node);
561       list_addtail(&deferred_submit->node, &submit_list);
562       dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
563    }
564 
565    assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
566 
567    simple_mtx_unlock(&dev->submit_lock);
568 
569    if (list_is_empty(&submit_list))
570       goto flush_sync;
571 
572    enqueue_submit_list(&submit_list);
573 
574 flush_sync:
575    /* Once we are sure that we've enqueued at least up to the requested
576     * submit, we need to be sure that submitq has caught up and flushed
577     * them to the kernel
578     */
579    pthread_mutex_lock(&flush_mtx);
580    while (fd_fence_before(msm_pipe->last_submit_fence, fence)) {
581       pthread_cond_wait(&flush_cnd, &flush_mtx);
582    }
583    pthread_mutex_unlock(&flush_mtx);
584 }
585 
586 static void
msm_submit_sp_destroy(struct fd_submit * submit)587 msm_submit_sp_destroy(struct fd_submit *submit)
588 {
589    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
590 
591    if (msm_submit->suballoc_ring)
592       fd_ringbuffer_del(msm_submit->suballoc_ring);
593 
594    _mesa_hash_table_destroy(msm_submit->bo_table, NULL);
595 
596    // TODO it would be nice to have a way to debug_assert() if all
597    // rb's haven't been free'd back to the slab, because that is
598    // an indication that we are leaking bo's
599    slab_destroy_child(&msm_submit->ring_pool);
600 
601    for (unsigned i = 0; i < msm_submit->nr_bos; i++)
602       fd_bo_del(msm_submit->bos[i]);
603 
604    free(msm_submit->bos);
605    free(msm_submit);
606 }
607 
608 static const struct fd_submit_funcs submit_funcs = {
609    .new_ringbuffer = msm_submit_sp_new_ringbuffer,
610    .flush = msm_submit_sp_flush,
611    .destroy = msm_submit_sp_destroy,
612 };
613 
614 struct fd_submit *
msm_submit_sp_new(struct fd_pipe * pipe)615 msm_submit_sp_new(struct fd_pipe *pipe)
616 {
617    struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
618    struct fd_submit *submit;
619 
620    msm_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
621                                                   _mesa_key_pointer_equal);
622 
623    slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
624 
625    submit = &msm_submit->base;
626    submit->funcs = &submit_funcs;
627 
628    return submit;
629 }
630 
631 void
msm_pipe_sp_ringpool_init(struct msm_pipe * msm_pipe)632 msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
633 {
634    // TODO tune size:
635    slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp),
636                       16);
637 }
638 
639 void
msm_pipe_sp_ringpool_fini(struct msm_pipe * msm_pipe)640 msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
641 {
642    if (msm_pipe->ring_pool.num_elements)
643       slab_destroy_parent(&msm_pipe->ring_pool);
644 }
645 
646 static void
finalize_current_cmd(struct fd_ringbuffer * ring)647 finalize_current_cmd(struct fd_ringbuffer *ring)
648 {
649    debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
650 
651    struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
652    APPEND(&msm_ring->u, cmds,
653           (struct msm_cmd_sp){
654              .ring_bo = fd_bo_ref(msm_ring->ring_bo),
655              .size = offset_bytes(ring->cur, ring->start),
656           });
657 }
658 
659 static void
msm_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)660 msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
661 {
662    struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
663    struct fd_pipe *pipe = msm_ring->u.submit->pipe;
664 
665    debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
666 
667    finalize_current_cmd(ring);
668 
669    fd_bo_del(msm_ring->ring_bo);
670    msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
671 
672    ring->start = fd_bo_map(msm_ring->ring_bo);
673    ring->end = &(ring->start[size / 4]);
674    ring->cur = ring->start;
675    ring->size = size;
676 }
677 
678 static inline bool
msm_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)679 msm_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
680 {
681    struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
682 
683    for (int i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
684       if (msm_ring->u.reloc_bos[i] == bo)
685          return true;
686    }
687    return false;
688 }
689 
690 #define PTRSZ 64
691 #include "msm_ringbuffer_sp.h"
692 #undef PTRSZ
693 #define PTRSZ 32
694 #include "msm_ringbuffer_sp.h"
695 #undef PTRSZ
696 
697 static uint32_t
msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)698 msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
699 {
700    if (ring->flags & FD_RINGBUFFER_GROWABLE)
701       return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
702    return 1;
703 }
704 
705 static bool
msm_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)706 msm_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
707 {
708    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
709    struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
710    struct fd_submit *submit = msm_ring->u.submit;
711 
712    if (to_msm_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
713       return false;
714    }
715 
716    return true;
717 }
718 
719 static void
msm_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)720 msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
721 {
722    struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
723 
724    fd_bo_del(msm_ring->ring_bo);
725 
726    if (ring->flags & _FD_RINGBUFFER_OBJECT) {
727       for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
728          fd_bo_del(msm_ring->u.reloc_bos[i]);
729       }
730       free(msm_ring->u.reloc_bos);
731 
732       free(msm_ring);
733    } else {
734       struct fd_submit *submit = msm_ring->u.submit;
735 
736       for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
737          fd_bo_del(msm_ring->u.cmds[i].ring_bo);
738       }
739       free(msm_ring->u.cmds);
740 
741       slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
742    }
743 }
744 
745 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
746    .grow = msm_ringbuffer_sp_grow,
747    .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_32,
748    .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
749    .cmd_count = msm_ringbuffer_sp_cmd_count,
750    .check_size = msm_ringbuffer_sp_check_size,
751    .destroy = msm_ringbuffer_sp_destroy,
752 };
753 
754 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
755    .grow = msm_ringbuffer_sp_grow,
756    .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_32,
757    .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_32,
758    .cmd_count = msm_ringbuffer_sp_cmd_count,
759    .destroy = msm_ringbuffer_sp_destroy,
760 };
761 
762 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
763    .grow = msm_ringbuffer_sp_grow,
764    .emit_reloc = msm_ringbuffer_sp_emit_reloc_nonobj_64,
765    .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
766    .cmd_count = msm_ringbuffer_sp_cmd_count,
767    .check_size = msm_ringbuffer_sp_check_size,
768    .destroy = msm_ringbuffer_sp_destroy,
769 };
770 
771 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
772    .grow = msm_ringbuffer_sp_grow,
773    .emit_reloc = msm_ringbuffer_sp_emit_reloc_obj_64,
774    .emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring_64,
775    .cmd_count = msm_ringbuffer_sp_cmd_count,
776    .destroy = msm_ringbuffer_sp_destroy,
777 };
778 
779 static inline struct fd_ringbuffer *
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp * msm_ring,uint32_t size,enum fd_ringbuffer_flags flags)780 msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
781                        enum fd_ringbuffer_flags flags)
782 {
783    struct fd_ringbuffer *ring = &msm_ring->base;
784 
785    /* We don't do any translation from internal FD_RELOC flags to MSM flags. */
786    STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ);
787    STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE);
788    STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP);
789 
790    debug_assert(msm_ring->ring_bo);
791 
792    uint8_t *base = fd_bo_map(msm_ring->ring_bo);
793    ring->start = (void *)(base + msm_ring->offset);
794    ring->end = &(ring->start[size / 4]);
795    ring->cur = ring->start;
796 
797    ring->size = size;
798    ring->flags = flags;
799 
800    if (flags & _FD_RINGBUFFER_OBJECT) {
801       if (fd_dev_64b(&msm_ring->u.pipe->dev_id)) {
802          ring->funcs = &ring_funcs_obj_64;
803       } else {
804          ring->funcs = &ring_funcs_obj_32;
805       }
806    } else {
807       if (fd_dev_64b(&msm_ring->u.submit->pipe->dev_id)) {
808          ring->funcs = &ring_funcs_nonobj_64;
809       } else {
810          ring->funcs = &ring_funcs_nonobj_32;
811       }
812    }
813 
814    // TODO initializing these could probably be conditional on flags
815    // since unneed for FD_RINGBUFFER_STAGING case..
816    msm_ring->u.cmds = NULL;
817    msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
818 
819    msm_ring->u.reloc_bos = NULL;
820    msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
821 
822    return ring;
823 }
824 
825 struct fd_ringbuffer *
msm_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)826 msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
827 {
828    struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
829    struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
830 
831    /* Lock access to the msm_pipe->suballoc_* since ringbuffer object allocation
832     * can happen both on the frontend (most CSOs) and the driver thread (a6xx
833     * cached tex state, for example)
834     */
835    static simple_mtx_t suballoc_lock = _SIMPLE_MTX_INITIALIZER_NP;
836    simple_mtx_lock(&suballoc_lock);
837 
838    /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
839    msm_ring->offset = align(msm_pipe->suballoc_offset, 64);
840    if (!msm_pipe->suballoc_bo ||
841        msm_ring->offset + size > fd_bo_size(msm_pipe->suballoc_bo)) {
842       if (msm_pipe->suballoc_bo)
843          fd_bo_del(msm_pipe->suballoc_bo);
844       msm_pipe->suballoc_bo =
845          fd_bo_new_ring(pipe->dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
846       msm_ring->offset = 0;
847    }
848 
849    msm_ring->u.pipe = pipe;
850    msm_ring->ring_bo = fd_bo_ref(msm_pipe->suballoc_bo);
851    msm_ring->base.refcnt = 1;
852 
853    msm_pipe->suballoc_offset = msm_ring->offset + size;
854 
855    simple_mtx_unlock(&suballoc_lock);
856 
857    return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
858 }
859