1 /*
2  * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include <assert.h>
28 #include <inttypes.h>
29 #include <pthread.h>
30 
31 #include "util/hash_table.h"
32 #include "util/os_file.h"
33 #include "util/slab.h"
34 
35 #include "freedreno_ringbuffer_sp.h"
36 
37 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
38  * by avoiding the additional tracking necessary to build cmds/relocs tables
39  * (but still builds a bos table)
40  */
41 
42 #define INIT_SIZE 0x1000
43 
44 #define SUBALLOC_SIZE (32 * 1024)
45 
46 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
47  * instead use a condition-variable.  Note that pipe->flush() is not expected
48  * to be a common/hot path.
49  */
50 static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
51 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
52 
53 static void finalize_current_cmd(struct fd_ringbuffer *ring);
54 static struct fd_ringbuffer *
55 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
56                       enum fd_ringbuffer_flags flags);
57 
58 /* add (if needed) bo to submit and return index: */
59 uint32_t
fd_submit_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo)60 fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
61 {
62    uint32_t idx;
63 
64    /* NOTE: it is legal to use the same bo on different threads for
65     * different submits.  But it is not legal to use the same submit
66     * from different threads.
67     */
68    idx = READ_ONCE(bo->idx);
69 
70    if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
71       uint32_t hash = _mesa_hash_pointer(bo);
72       struct hash_entry *entry;
73 
74       entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
75       if (entry) {
76          /* found */
77          idx = (uint32_t)(uintptr_t)entry->data;
78       } else {
79          idx = APPEND(submit, bos, fd_bo_ref(bo));
80 
81          _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
82                                             (void *)(uintptr_t)idx);
83       }
84       bo->idx = idx;
85    }
86 
87    return idx;
88 }
89 
90 static void
fd_submit_suballoc_ring_bo(struct fd_submit * submit,struct fd_ringbuffer_sp * fd_ring,uint32_t size)91 fd_submit_suballoc_ring_bo(struct fd_submit *submit,
92                            struct fd_ringbuffer_sp *fd_ring, uint32_t size)
93 {
94    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
95    unsigned suballoc_offset = 0;
96    struct fd_bo *suballoc_bo = NULL;
97 
98    if (fd_submit->suballoc_ring) {
99       struct fd_ringbuffer_sp *suballoc_ring =
100          to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
101 
102       suballoc_bo = suballoc_ring->ring_bo;
103       suballoc_offset =
104          fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
105 
106       suballoc_offset = align(suballoc_offset, 0x10);
107 
108       if ((size + suballoc_offset) > suballoc_bo->size) {
109          suballoc_bo = NULL;
110       }
111    }
112 
113    if (!suballoc_bo) {
114       // TODO possibly larger size for streaming bo?
115       fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
116       fd_ring->offset = 0;
117    } else {
118       fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
119       fd_ring->offset = suballoc_offset;
120    }
121 
122    struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
123 
124    fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
125 
126    if (old_suballoc_ring)
127       fd_ringbuffer_del(old_suballoc_ring);
128 }
129 
130 static struct fd_ringbuffer *
fd_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)131 fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
132                             enum fd_ringbuffer_flags flags)
133 {
134    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
135    struct fd_ringbuffer_sp *fd_ring;
136 
137    fd_ring = slab_alloc(&fd_submit->ring_pool);
138 
139    fd_ring->u.submit = submit;
140 
141    /* NOTE: needs to be before _suballoc_ring_bo() since it could
142     * increment the refcnt of the current ring
143     */
144    fd_ring->base.refcnt = 1;
145 
146    if (flags & FD_RINGBUFFER_STREAMING) {
147       fd_submit_suballoc_ring_bo(submit, fd_ring, size);
148    } else {
149       if (flags & FD_RINGBUFFER_GROWABLE)
150          size = INIT_SIZE;
151 
152       fd_ring->offset = 0;
153       fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
154    }
155 
156    if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
157       return NULL;
158 
159    return &fd_ring->base;
160 }
161 
162 /**
163  * Prepare submit for flush, always done synchronously.
164  *
165  * 1) Finalize primary ringbuffer, at this point no more cmdstream may
166  *    be written into it, since from the PoV of the upper level driver
167  *    the submit is flushed, even if deferred
168  * 2) Add cmdstream bos to bos table
169  * 3) Update bo fences
170  */
171 static bool
fd_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)172 fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
173                         struct fd_submit_fence *out_fence)
174 {
175    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
176    bool has_shared = false;
177 
178    finalize_current_cmd(submit->primary);
179 
180    struct fd_ringbuffer_sp *primary =
181       to_fd_ringbuffer_sp(submit->primary);
182 
183    for (unsigned i = 0; i < primary->u.nr_cmds; i++)
184       fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
185 
186    simple_mtx_lock(&table_lock);
187    for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
188       fd_bo_add_fence(fd_submit->bos[i], submit->pipe, submit->fence);
189       has_shared |= fd_submit->bos[i]->shared;
190    }
191    simple_mtx_unlock(&table_lock);
192 
193    fd_submit->out_fence   = out_fence;
194    fd_submit->in_fence_fd = (in_fence_fd == -1) ?
195          -1 : os_dupfd_cloexec(in_fence_fd);
196 
197    return has_shared;
198 }
199 
200 static void
fd_submit_sp_flush_execute(void * job,void * gdata,int thread_index)201 fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
202 {
203    struct fd_submit *submit = job;
204    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
205    struct fd_pipe *pipe = submit->pipe;
206 
207    fd_submit->flush_submit_list(&fd_submit->submit_list);
208 
209    pthread_mutex_lock(&flush_mtx);
210    assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
211    pipe->last_submit_fence = fd_submit->base.fence;
212    pthread_cond_broadcast(&flush_cnd);
213    pthread_mutex_unlock(&flush_mtx);
214 
215    DEBUG_MSG("finish: %u", submit->fence);
216 }
217 
218 static void
fd_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)219 fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
220 {
221    struct fd_submit *submit = job;
222    fd_submit_del(submit);
223 }
224 
225 static int
enqueue_submit_list(struct list_head * submit_list)226 enqueue_submit_list(struct list_head *submit_list)
227 {
228    struct fd_submit *submit = last_submit(submit_list);
229    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
230 
231    list_replace(submit_list, &fd_submit->submit_list);
232    list_inithead(submit_list);
233 
234    struct util_queue_fence *fence;
235    if (fd_submit->out_fence) {
236       fence = &fd_submit->out_fence->ready;
237    } else {
238       util_queue_fence_init(&fd_submit->fence);
239       fence = &fd_submit->fence;
240    }
241 
242    DEBUG_MSG("enqueue: %u", submit->fence);
243 
244    util_queue_add_job(&submit->pipe->dev->submit_queue,
245                       submit, fence,
246                       fd_submit_sp_flush_execute,
247                       fd_submit_sp_flush_cleanup,
248                       0);
249 
250    return 0;
251 }
252 
253 static bool
should_defer(struct fd_submit * submit)254 should_defer(struct fd_submit *submit)
255 {
256    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
257 
258    /* if too many bo's, it may not be worth the CPU cost of submit merging: */
259    if (fd_submit->nr_bos > 30)
260       return false;
261 
262    /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
263     * cmds before we exceed the size of the ringbuffer, which results in
264     * deadlock writing into the RB (ie. kernel doesn't finish writing into
265     * the RB so it doesn't kick the GPU to start consuming from the RB)
266     */
267    if (submit->pipe->dev->deferred_cmds > 128)
268       return false;
269 
270    return true;
271 }
272 
273 static int
fd_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,struct fd_submit_fence * out_fence)274 fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
275                    struct fd_submit_fence *out_fence)
276 {
277    struct fd_device *dev = submit->pipe->dev;
278    struct fd_pipe *pipe = submit->pipe;
279 
280    /* Acquire lock before flush_prep() because it is possible to race between
281     * this and pipe->flush():
282     */
283    simple_mtx_lock(&dev->submit_lock);
284 
285    /* If there are deferred submits from another fd_pipe, flush them now,
286     * since we can't merge submits from different submitqueue's (ie. they
287     * could have different priority, etc)
288     */
289    if (!list_is_empty(&dev->deferred_submits) &&
290        (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
291       struct list_head submit_list;
292 
293       list_replace(&dev->deferred_submits, &submit_list);
294       list_inithead(&dev->deferred_submits);
295       dev->deferred_cmds = 0;
296 
297       enqueue_submit_list(&submit_list);
298    }
299 
300    list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
301 
302    bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
303 
304    assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
305    pipe->last_enqueue_fence = submit->fence;
306 
307    /* If we don't need an out-fence, we can defer the submit.
308     *
309     * TODO we could defer submits with in-fence as well.. if we took our own
310     * reference to the fd, and merged all the in-fence-fd's when we flush the
311     * deferred submits
312     */
313    if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
314       DEBUG_MSG("defer: %u", submit->fence);
315       dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
316       assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
317       simple_mtx_unlock(&dev->submit_lock);
318 
319       return 0;
320    }
321 
322    struct list_head submit_list;
323 
324    list_replace(&dev->deferred_submits, &submit_list);
325    list_inithead(&dev->deferred_submits);
326    dev->deferred_cmds = 0;
327 
328    simple_mtx_unlock(&dev->submit_lock);
329 
330    return enqueue_submit_list(&submit_list);
331 }
332 
333 void
fd_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)334 fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
335 {
336    struct fd_device *dev = pipe->dev;
337    struct list_head submit_list;
338 
339    DEBUG_MSG("flush: %u", fence);
340 
341    list_inithead(&submit_list);
342 
343    simple_mtx_lock(&dev->submit_lock);
344 
345    assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
346 
347    foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
348       /* We should never have submits from multiple pipes in the deferred
349        * list.  If we did, we couldn't compare their fence to our fence,
350        * since each fd_pipe is an independent timeline.
351        */
352       if (deferred_submit->pipe != pipe)
353          break;
354 
355       if (fd_fence_after(deferred_submit->fence, fence))
356          break;
357 
358       list_del(&deferred_submit->node);
359       list_addtail(&deferred_submit->node, &submit_list);
360       dev->deferred_cmds -= fd_ringbuffer_cmd_count(deferred_submit->primary);
361    }
362 
363    assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
364 
365    simple_mtx_unlock(&dev->submit_lock);
366 
367    if (list_is_empty(&submit_list))
368       goto flush_sync;
369 
370    enqueue_submit_list(&submit_list);
371 
372 flush_sync:
373    /* Once we are sure that we've enqueued at least up to the requested
374     * submit, we need to be sure that submitq has caught up and flushed
375     * them to the kernel
376     */
377    pthread_mutex_lock(&flush_mtx);
378    while (fd_fence_before(pipe->last_submit_fence, fence)) {
379       pthread_cond_wait(&flush_cnd, &flush_mtx);
380    }
381    pthread_mutex_unlock(&flush_mtx);
382 }
383 
384 static void
fd_submit_sp_destroy(struct fd_submit * submit)385 fd_submit_sp_destroy(struct fd_submit *submit)
386 {
387    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
388 
389    if (fd_submit->suballoc_ring)
390       fd_ringbuffer_del(fd_submit->suballoc_ring);
391 
392    _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
393 
394    // TODO it would be nice to have a way to debug_assert() if all
395    // rb's haven't been free'd back to the slab, because that is
396    // an indication that we are leaking bo's
397    slab_destroy_child(&fd_submit->ring_pool);
398 
399    for (unsigned i = 0; i < fd_submit->nr_bos; i++)
400       fd_bo_del(fd_submit->bos[i]);
401 
402    free(fd_submit->bos);
403    free(fd_submit);
404 }
405 
406 static const struct fd_submit_funcs submit_funcs = {
407    .new_ringbuffer = fd_submit_sp_new_ringbuffer,
408    .flush = fd_submit_sp_flush,
409    .destroy = fd_submit_sp_destroy,
410 };
411 
412 struct fd_submit *
fd_submit_sp_new(struct fd_pipe * pipe,flush_submit_list_fn flush_submit_list)413 fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
414 {
415    struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
416    struct fd_submit *submit;
417 
418    fd_submit->bo_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
419                                                  _mesa_key_pointer_equal);
420 
421    slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
422 
423    fd_submit->flush_submit_list = flush_submit_list;
424 
425    submit = &fd_submit->base;
426    submit->funcs = &submit_funcs;
427 
428    return submit;
429 }
430 
431 void
fd_pipe_sp_ringpool_init(struct fd_pipe * pipe)432 fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
433 {
434    // TODO tune size:
435    slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
436 }
437 
438 void
fd_pipe_sp_ringpool_fini(struct fd_pipe * pipe)439 fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
440 {
441    if (pipe->ring_pool.num_elements)
442       slab_destroy_parent(&pipe->ring_pool);
443 }
444 
445 static void
finalize_current_cmd(struct fd_ringbuffer * ring)446 finalize_current_cmd(struct fd_ringbuffer *ring)
447 {
448    debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
449 
450    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
451    APPEND(&fd_ring->u, cmds,
452           (struct fd_cmd_sp){
453              .ring_bo = fd_bo_ref(fd_ring->ring_bo),
454              .size = offset_bytes(ring->cur, ring->start),
455           });
456 }
457 
458 static void
fd_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)459 fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
460 {
461    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
462    struct fd_pipe *pipe = fd_ring->u.submit->pipe;
463 
464    debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
465 
466    finalize_current_cmd(ring);
467 
468    fd_bo_del(fd_ring->ring_bo);
469    fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
470 
471    ring->start = fd_bo_map(fd_ring->ring_bo);
472    ring->end = &(ring->start[size / 4]);
473    ring->cur = ring->start;
474    ring->size = size;
475 }
476 
477 static inline bool
fd_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)478 fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
479 {
480    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
481 
482    for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
483       if (fd_ring->u.reloc_bos[i] == bo)
484          return true;
485    }
486    return false;
487 }
488 
489 #define PTRSZ 64
490 #include "freedreno_ringbuffer_sp_reloc.h"
491 #undef PTRSZ
492 #define PTRSZ 32
493 #include "freedreno_ringbuffer_sp_reloc.h"
494 #undef PTRSZ
495 
496 static uint32_t
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)497 fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
498 {
499    if (ring->flags & FD_RINGBUFFER_GROWABLE)
500       return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
501    return 1;
502 }
503 
504 static bool
fd_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)505 fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
506 {
507    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
508    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
509    struct fd_submit *submit = fd_ring->u.submit;
510 
511    if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
512       return false;
513    }
514 
515    return true;
516 }
517 
518 static void
fd_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)519 fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
520 {
521    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
522 
523    fd_bo_del(fd_ring->ring_bo);
524 
525    if (ring->flags & _FD_RINGBUFFER_OBJECT) {
526       for (unsigned i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
527          fd_bo_del(fd_ring->u.reloc_bos[i]);
528       }
529       free(fd_ring->u.reloc_bos);
530 
531       free(fd_ring);
532    } else {
533       struct fd_submit *submit = fd_ring->u.submit;
534 
535       for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
536          fd_bo_del(fd_ring->u.cmds[i].ring_bo);
537       }
538       free(fd_ring->u.cmds);
539 
540       slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
541    }
542 }
543 
544 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
545    .grow = fd_ringbuffer_sp_grow,
546    .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
547    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
548    .cmd_count = fd_ringbuffer_sp_cmd_count,
549    .check_size = fd_ringbuffer_sp_check_size,
550    .destroy = fd_ringbuffer_sp_destroy,
551 };
552 
553 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
554    .grow = fd_ringbuffer_sp_grow,
555    .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
556    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
557    .cmd_count = fd_ringbuffer_sp_cmd_count,
558    .destroy = fd_ringbuffer_sp_destroy,
559 };
560 
561 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
562    .grow = fd_ringbuffer_sp_grow,
563    .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
564    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
565    .cmd_count = fd_ringbuffer_sp_cmd_count,
566    .check_size = fd_ringbuffer_sp_check_size,
567    .destroy = fd_ringbuffer_sp_destroy,
568 };
569 
570 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
571    .grow = fd_ringbuffer_sp_grow,
572    .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
573    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
574    .cmd_count = fd_ringbuffer_sp_cmd_count,
575    .destroy = fd_ringbuffer_sp_destroy,
576 };
577 
578 static inline struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp * fd_ring,uint32_t size,enum fd_ringbuffer_flags flags)579 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
580                       enum fd_ringbuffer_flags flags)
581 {
582    struct fd_ringbuffer *ring = &fd_ring->base;
583 
584    debug_assert(fd_ring->ring_bo);
585 
586    uint8_t *base = fd_bo_map(fd_ring->ring_bo);
587    ring->start = (void *)(base + fd_ring->offset);
588    ring->end = &(ring->start[size / 4]);
589    ring->cur = ring->start;
590 
591    ring->size = size;
592    ring->flags = flags;
593 
594    if (flags & _FD_RINGBUFFER_OBJECT) {
595       if (fd_dev_64b(&fd_ring->u.pipe->dev_id)) {
596          ring->funcs = &ring_funcs_obj_64;
597       } else {
598          ring->funcs = &ring_funcs_obj_32;
599       }
600    } else {
601       if (fd_dev_64b(&fd_ring->u.submit->pipe->dev_id)) {
602          ring->funcs = &ring_funcs_nonobj_64;
603       } else {
604          ring->funcs = &ring_funcs_nonobj_32;
605       }
606    }
607 
608    // TODO initializing these could probably be conditional on flags
609    // since unneed for FD_RINGBUFFER_STAGING case..
610    fd_ring->u.cmds = NULL;
611    fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
612 
613    fd_ring->u.reloc_bos = NULL;
614    fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
615 
616    return ring;
617 }
618 
619 struct fd_ringbuffer *
fd_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)620 fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
621 {
622    struct fd_device *dev = pipe->dev;
623    struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
624 
625    /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
626     * can happen both on the frontend (most CSOs) and the driver thread (a6xx
627     * cached tex state, for example)
628     */
629    simple_mtx_lock(&dev->suballoc_lock);
630 
631    /* Maximum known alignment requirement is a6xx's TEX_CONST at 16 dwords */
632    fd_ring->offset = align(dev->suballoc_offset, 64);
633    if (!dev->suballoc_bo ||
634        fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
635       if (dev->suballoc_bo)
636          fd_bo_del(dev->suballoc_bo);
637       dev->suballoc_bo =
638          fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, 4096)));
639       fd_ring->offset = 0;
640    }
641 
642    fd_ring->u.pipe = pipe;
643    fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
644    fd_ring->base.refcnt = 1;
645 
646    dev->suballoc_offset = fd_ring->offset + size;
647 
648    simple_mtx_unlock(&dev->suballoc_lock);
649 
650    return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
651 }
652