1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 #include "radeon_drm_cs.h"
28 
29 #include "util/u_hash_table.h"
30 #include "util/u_memory.h"
31 #include "util/simple_list.h"
32 #include "os/os_thread.h"
33 #include "os/os_mman.h"
34 #include "util/os_time.h"
35 
36 #include "frontend/drm_driver.h"
37 
38 #include <sys/ioctl.h>
39 #include <xf86drm.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <inttypes.h>
44 
45 static struct pb_buffer *
46 radeon_winsys_bo_create(struct radeon_winsys *rws,
47                         uint64_t size,
48                         unsigned alignment,
49                         enum radeon_bo_domain domain,
50                         enum radeon_bo_flag flags);
51 
radeon_bo(struct pb_buffer * bo)52 static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
53 {
54    return (struct radeon_bo *)bo;
55 }
56 
57 struct radeon_bo_va_hole {
58    struct list_head list;
59    uint64_t         offset;
60    uint64_t         size;
61 };
62 
radeon_real_bo_is_busy(struct radeon_bo * bo)63 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
64 {
65    struct drm_radeon_gem_busy args = {0};
66 
67    args.handle = bo->handle;
68    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
69                               &args, sizeof(args)) != 0;
70 }
71 
radeon_bo_is_busy(struct radeon_bo * bo)72 static bool radeon_bo_is_busy(struct radeon_bo *bo)
73 {
74    unsigned num_idle;
75    bool busy = false;
76 
77    if (bo->handle)
78       return radeon_real_bo_is_busy(bo);
79 
80    mtx_lock(&bo->rws->bo_fence_lock);
81    for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
82       if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
83          busy = true;
84          break;
85       }
86       radeon_ws_bo_reference(&bo->u.slab.fences[num_idle], NULL);
87    }
88    memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
89          (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
90    bo->u.slab.num_fences -= num_idle;
91    mtx_unlock(&bo->rws->bo_fence_lock);
92 
93    return busy;
94 }
95 
radeon_real_bo_wait_idle(struct radeon_bo * bo)96 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
97 {
98    struct drm_radeon_gem_wait_idle args = {0};
99 
100    args.handle = bo->handle;
101    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
102                           &args, sizeof(args)) == -EBUSY);
103 }
104 
radeon_bo_wait_idle(struct radeon_bo * bo)105 static void radeon_bo_wait_idle(struct radeon_bo *bo)
106 {
107    if (bo->handle) {
108       radeon_real_bo_wait_idle(bo);
109    } else {
110       mtx_lock(&bo->rws->bo_fence_lock);
111       while (bo->u.slab.num_fences) {
112          struct radeon_bo *fence = NULL;
113          radeon_ws_bo_reference(&fence, bo->u.slab.fences[0]);
114          mtx_unlock(&bo->rws->bo_fence_lock);
115 
116          /* Wait without holding the fence lock. */
117          radeon_real_bo_wait_idle(fence);
118 
119          mtx_lock(&bo->rws->bo_fence_lock);
120          if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
121             radeon_ws_bo_reference(&bo->u.slab.fences[0], NULL);
122             memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
123                   (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
124             bo->u.slab.num_fences--;
125          }
126          radeon_ws_bo_reference(&fence, NULL);
127       }
128       mtx_unlock(&bo->rws->bo_fence_lock);
129    }
130 }
131 
radeon_bo_wait(struct radeon_winsys * rws,struct pb_buffer * _buf,uint64_t timeout,unsigned usage)132 static bool radeon_bo_wait(struct radeon_winsys *rws,
133                            struct pb_buffer *_buf, uint64_t timeout,
134                            unsigned usage)
135 {
136    struct radeon_bo *bo = radeon_bo(_buf);
137    int64_t abs_timeout;
138 
139    /* No timeout. Just query. */
140    if (timeout == 0)
141       return !bo->num_active_ioctls && !radeon_bo_is_busy(bo);
142 
143    abs_timeout = os_time_get_absolute_timeout(timeout);
144 
145    /* Wait if any ioctl is being submitted with this buffer. */
146    if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
147       return false;
148 
149    /* Infinite timeout. */
150    if (abs_timeout == PIPE_TIMEOUT_INFINITE) {
151       radeon_bo_wait_idle(bo);
152       return true;
153    }
154 
155    /* Other timeouts need to be emulated with a loop. */
156    while (radeon_bo_is_busy(bo)) {
157       if (os_time_get_nano() >= abs_timeout)
158          return false;
159       os_time_sleep(10);
160    }
161 
162    return true;
163 }
164 
get_valid_domain(enum radeon_bo_domain domain)165 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
166 {
167    /* Zero domains the driver doesn't understand. */
168    domain &= RADEON_DOMAIN_VRAM_GTT;
169 
170    /* If no domain is set, we must set something... */
171    if (!domain)
172       domain = RADEON_DOMAIN_VRAM_GTT;
173 
174    return domain;
175 }
176 
radeon_bo_get_initial_domain(struct pb_buffer * buf)177 static enum radeon_bo_domain radeon_bo_get_initial_domain(
178       struct pb_buffer *buf)
179 {
180    struct radeon_bo *bo = (struct radeon_bo*)buf;
181    struct drm_radeon_gem_op args;
182 
183    if (bo->rws->info.drm_minor < 38)
184       return RADEON_DOMAIN_VRAM_GTT;
185 
186    memset(&args, 0, sizeof(args));
187    args.handle = bo->handle;
188    args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
189 
190    if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
191                            &args, sizeof(args))) {
192       fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
193               bo, bo->handle);
194       /* Default domain as returned by get_valid_domain. */
195       return RADEON_DOMAIN_VRAM_GTT;
196    }
197 
198    /* GEM domains and winsys domains are defined the same. */
199    return get_valid_domain(args.value);
200 }
201 
radeon_bomgr_find_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t size,uint64_t alignment)202 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
203                                      struct radeon_vm_heap *heap,
204                                      uint64_t size, uint64_t alignment)
205 {
206    struct radeon_bo_va_hole *hole, *n;
207    uint64_t offset = 0, waste = 0;
208 
209    /* All VM address space holes will implicitly start aligned to the
210     * size alignment, so we don't need to sanitize the alignment here
211     */
212    size = align(size, info->gart_page_size);
213 
214    mtx_lock(&heap->mutex);
215    /* first look for a hole */
216    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
217       offset = hole->offset;
218       waste = offset % alignment;
219       waste = waste ? alignment - waste : 0;
220       offset += waste;
221       if (offset >= (hole->offset + hole->size)) {
222          continue;
223       }
224       if (!waste && hole->size == size) {
225          offset = hole->offset;
226          list_del(&hole->list);
227          FREE(hole);
228          mtx_unlock(&heap->mutex);
229          return offset;
230       }
231       if ((hole->size - waste) > size) {
232          if (waste) {
233             n = CALLOC_STRUCT(radeon_bo_va_hole);
234             n->size = waste;
235             n->offset = hole->offset;
236             list_add(&n->list, &hole->list);
237          }
238          hole->size -= (size + waste);
239          hole->offset += size + waste;
240          mtx_unlock(&heap->mutex);
241          return offset;
242       }
243       if ((hole->size - waste) == size) {
244          hole->size = waste;
245          mtx_unlock(&heap->mutex);
246          return offset;
247       }
248    }
249 
250    offset = heap->start;
251    waste = offset % alignment;
252    waste = waste ? alignment - waste : 0;
253 
254    if (offset + waste + size > heap->end) {
255       mtx_unlock(&heap->mutex);
256       return 0;
257    }
258 
259    if (waste) {
260       n = CALLOC_STRUCT(radeon_bo_va_hole);
261       n->size = waste;
262       n->offset = offset;
263       list_add(&n->list, &heap->holes);
264    }
265    offset += waste;
266    heap->start += size + waste;
267    mtx_unlock(&heap->mutex);
268    return offset;
269 }
270 
radeon_bomgr_find_va64(struct radeon_drm_winsys * ws,uint64_t size,uint64_t alignment)271 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
272                                        uint64_t size, uint64_t alignment)
273 {
274    uint64_t va = 0;
275 
276    /* Try to allocate from the 64-bit address space first.
277     * If it doesn't exist (start = 0) or if it doesn't have enough space,
278     * fall back to the 32-bit address space.
279     */
280    if (ws->vm64.start)
281       va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
282    if (!va)
283       va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
284    return va;
285 }
286 
radeon_bomgr_free_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t va,uint64_t size)287 static void radeon_bomgr_free_va(const struct radeon_info *info,
288                                  struct radeon_vm_heap *heap,
289                                  uint64_t va, uint64_t size)
290 {
291    struct radeon_bo_va_hole *hole = NULL;
292 
293    size = align(size, info->gart_page_size);
294 
295    mtx_lock(&heap->mutex);
296    if ((va + size) == heap->start) {
297       heap->start = va;
298       /* Delete uppermost hole if it reaches the new top */
299       if (!list_is_empty(&heap->holes)) {
300          hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
301          if ((hole->offset + hole->size) == va) {
302             heap->start = hole->offset;
303             list_del(&hole->list);
304             FREE(hole);
305          }
306       }
307    } else {
308       struct radeon_bo_va_hole *next;
309 
310       hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
311       LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
312          if (next->offset < va)
313             break;
314          hole = next;
315       }
316 
317       if (&hole->list != &heap->holes) {
318          /* Grow upper hole if it's adjacent */
319          if (hole->offset == (va + size)) {
320             hole->offset = va;
321             hole->size += size;
322             /* Merge lower hole if it's adjacent */
323             if (next != hole && &next->list != &heap->holes &&
324                 (next->offset + next->size) == va) {
325                next->size += hole->size;
326                list_del(&hole->list);
327                FREE(hole);
328             }
329             goto out;
330          }
331       }
332 
333       /* Grow lower hole if it's adjacent */
334       if (next != hole && &next->list != &heap->holes &&
335           (next->offset + next->size) == va) {
336          next->size += size;
337          goto out;
338       }
339 
340       /* FIXME on allocation failure we just lose virtual address space
341        * maybe print a warning
342        */
343       next = CALLOC_STRUCT(radeon_bo_va_hole);
344       if (next) {
345          next->size = size;
346          next->offset = va;
347          list_add(&next->list, &hole->list);
348       }
349    }
350 out:
351    mtx_unlock(&heap->mutex);
352 }
353 
radeon_bo_destroy(void * winsys,struct pb_buffer * _buf)354 void radeon_bo_destroy(void *winsys, struct pb_buffer *_buf)
355 {
356    struct radeon_bo *bo = radeon_bo(_buf);
357    struct radeon_drm_winsys *rws = bo->rws;
358    struct drm_gem_close args;
359 
360    assert(bo->handle && "must not be called for slab entries");
361 
362    memset(&args, 0, sizeof(args));
363 
364    mtx_lock(&rws->bo_handles_mutex);
365    _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
366    if (bo->flink_name) {
367       _mesa_hash_table_remove_key(rws->bo_names,
368                                   (void*)(uintptr_t)bo->flink_name);
369    }
370    mtx_unlock(&rws->bo_handles_mutex);
371 
372    if (bo->u.real.ptr)
373       os_munmap(bo->u.real.ptr, bo->base.size);
374 
375    if (rws->info.r600_has_virtual_memory) {
376       if (rws->va_unmap_working) {
377          struct drm_radeon_gem_va va;
378 
379          va.handle = bo->handle;
380          va.vm_id = 0;
381          va.operation = RADEON_VA_UNMAP;
382          va.flags = RADEON_VM_PAGE_READABLE |
383                     RADEON_VM_PAGE_WRITEABLE |
384                     RADEON_VM_PAGE_SNOOPED;
385          va.offset = bo->va;
386 
387          if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
388                                  sizeof(va)) != 0 &&
389              va.operation == RADEON_VA_RESULT_ERROR) {
390             fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
391             fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
392             fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
393          }
394       }
395 
396       radeon_bomgr_free_va(&rws->info,
397                            bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
398                            bo->va, bo->base.size);
399    }
400 
401    /* Close object. */
402    args.handle = bo->handle;
403    drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
404 
405    mtx_destroy(&bo->u.real.map_mutex);
406 
407    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
408       rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
409    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
410       rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
411 
412    if (bo->u.real.map_count >= 1) {
413       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
414          bo->rws->mapped_vram -= bo->base.size;
415       else
416          bo->rws->mapped_gtt -= bo->base.size;
417       bo->rws->num_mapped_buffers--;
418    }
419 
420    FREE(bo);
421 }
422 
radeon_bo_destroy_or_cache(void * winsys,struct pb_buffer * _buf)423 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer *_buf)
424 {
425    struct radeon_bo *bo = radeon_bo(_buf);
426 
427    assert(bo->handle && "must not be called for slab entries");
428 
429    if (bo->u.real.use_reusable_pool)
430       pb_cache_add_buffer(&bo->u.real.cache_entry);
431    else
432       radeon_bo_destroy(NULL, _buf);
433 }
434 
radeon_bo_do_map(struct radeon_bo * bo)435 void *radeon_bo_do_map(struct radeon_bo *bo)
436 {
437    struct drm_radeon_gem_mmap args = {0};
438    void *ptr;
439    unsigned offset;
440 
441    /* If the buffer is created from user memory, return the user pointer. */
442    if (bo->user_ptr)
443       return bo->user_ptr;
444 
445    if (bo->handle) {
446       offset = 0;
447    } else {
448       offset = bo->va - bo->u.slab.real->va;
449       bo = bo->u.slab.real;
450    }
451 
452    /* Map the buffer. */
453    mtx_lock(&bo->u.real.map_mutex);
454    /* Return the pointer if it's already mapped. */
455    if (bo->u.real.ptr) {
456       bo->u.real.map_count++;
457       mtx_unlock(&bo->u.real.map_mutex);
458       return (uint8_t*)bo->u.real.ptr + offset;
459    }
460    args.handle = bo->handle;
461    args.offset = 0;
462    args.size = (uint64_t)bo->base.size;
463    if (drmCommandWriteRead(bo->rws->fd,
464                            DRM_RADEON_GEM_MMAP,
465                            &args,
466                            sizeof(args))) {
467       mtx_unlock(&bo->u.real.map_mutex);
468       fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
469               bo, bo->handle);
470       return NULL;
471    }
472 
473    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
474                  bo->rws->fd, args.addr_ptr);
475    if (ptr == MAP_FAILED) {
476       /* Clear the cache and try again. */
477       pb_cache_release_all_buffers(&bo->rws->bo_cache);
478 
479       ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
480                     bo->rws->fd, args.addr_ptr);
481       if (ptr == MAP_FAILED) {
482          mtx_unlock(&bo->u.real.map_mutex);
483          fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
484          return NULL;
485       }
486    }
487    bo->u.real.ptr = ptr;
488    bo->u.real.map_count = 1;
489 
490    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
491       bo->rws->mapped_vram += bo->base.size;
492    else
493       bo->rws->mapped_gtt += bo->base.size;
494    bo->rws->num_mapped_buffers++;
495 
496    mtx_unlock(&bo->u.real.map_mutex);
497    return (uint8_t*)bo->u.real.ptr + offset;
498 }
499 
radeon_bo_map(struct radeon_winsys * rws,struct pb_buffer * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)500 static void *radeon_bo_map(struct radeon_winsys *rws,
501                            struct pb_buffer *buf,
502                            struct radeon_cmdbuf *rcs,
503                            enum pipe_map_flags usage)
504 {
505    struct radeon_bo *bo = (struct radeon_bo*)buf;
506    struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
507 
508    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
509    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
510       /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
511       if (usage & PIPE_MAP_DONTBLOCK) {
512          if (!(usage & PIPE_MAP_WRITE)) {
513             /* Mapping for read.
514              *
515              * Since we are mapping for read, we don't need to wait
516              * if the GPU is using the buffer for read too
517              * (neither one is changing it).
518              *
519              * Only check whether the buffer is being used for write. */
520             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
521                cs->flush_cs(cs->flush_data,
522                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
523                return NULL;
524             }
525 
526             if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
527                                 RADEON_USAGE_WRITE)) {
528                return NULL;
529             }
530          } else {
531             if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
532                cs->flush_cs(cs->flush_data,
533                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
534                return NULL;
535             }
536 
537             if (!radeon_bo_wait(rws, (struct pb_buffer*)bo, 0,
538                                 RADEON_USAGE_READWRITE)) {
539                return NULL;
540             }
541          }
542       } else {
543          uint64_t time = os_time_get_nano();
544 
545          if (!(usage & PIPE_MAP_WRITE)) {
546             /* Mapping for read.
547              *
548              * Since we are mapping for read, we don't need to wait
549              * if the GPU is using the buffer for read too
550              * (neither one is changing it).
551              *
552              * Only check whether the buffer is being used for write. */
553             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
554                cs->flush_cs(cs->flush_data,
555                             RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
556             }
557             radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
558                            RADEON_USAGE_WRITE);
559          } else {
560             /* Mapping for write. */
561             if (cs) {
562                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
563                   cs->flush_cs(cs->flush_data,
564                                RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
565                } else {
566                   /* Try to avoid busy-waiting in radeon_bo_wait. */
567                   if (p_atomic_read(&bo->num_active_ioctls))
568                      radeon_drm_cs_sync_flush(rcs);
569                }
570             }
571 
572             radeon_bo_wait(rws, (struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
573                            RADEON_USAGE_READWRITE);
574          }
575 
576          bo->rws->buffer_wait_time += os_time_get_nano() - time;
577       }
578    }
579 
580    return radeon_bo_do_map(bo);
581 }
582 
radeon_bo_unmap(struct radeon_winsys * rws,struct pb_buffer * _buf)583 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer *_buf)
584 {
585    struct radeon_bo *bo = (struct radeon_bo*)_buf;
586 
587    if (bo->user_ptr)
588       return;
589 
590    if (!bo->handle)
591       bo = bo->u.slab.real;
592 
593    mtx_lock(&bo->u.real.map_mutex);
594    if (!bo->u.real.ptr) {
595       mtx_unlock(&bo->u.real.map_mutex);
596       return; /* it's not been mapped */
597    }
598 
599    assert(bo->u.real.map_count);
600    if (--bo->u.real.map_count) {
601       mtx_unlock(&bo->u.real.map_mutex);
602       return; /* it's been mapped multiple times */
603    }
604 
605    os_munmap(bo->u.real.ptr, bo->base.size);
606    bo->u.real.ptr = NULL;
607 
608    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
609       bo->rws->mapped_vram -= bo->base.size;
610    else
611       bo->rws->mapped_gtt -= bo->base.size;
612    bo->rws->num_mapped_buffers--;
613 
614    mtx_unlock(&bo->u.real.map_mutex);
615 }
616 
617 static const struct pb_vtbl radeon_bo_vtbl = {
618    radeon_bo_destroy_or_cache
619    /* other functions are never called */
620 };
621 
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned initial_domains,unsigned flags,int heap)622 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
623                                           unsigned size, unsigned alignment,
624                                           unsigned initial_domains,
625                                           unsigned flags,
626                                           int heap)
627 {
628    struct radeon_bo *bo;
629    struct drm_radeon_gem_create args;
630    int r;
631 
632    memset(&args, 0, sizeof(args));
633 
634    assert(initial_domains);
635    assert((initial_domains &
636            ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
637 
638    args.size = size;
639    args.alignment = alignment;
640    args.initial_domain = initial_domains;
641    args.flags = 0;
642 
643    /* If VRAM is just stolen system memory, allow both VRAM and
644     * GTT, whichever has free space. If a buffer is evicted from
645     * VRAM to GTT, it will stay there.
646     */
647    if (!rws->info.has_dedicated_vram)
648       args.initial_domain |= RADEON_DOMAIN_GTT;
649 
650    if (flags & RADEON_FLAG_GTT_WC)
651       args.flags |= RADEON_GEM_GTT_WC;
652    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
653       args.flags |= RADEON_GEM_NO_CPU_ACCESS;
654 
655    if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
656                            &args, sizeof(args))) {
657       fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
658       fprintf(stderr, "radeon:    size      : %u bytes\n", size);
659       fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
660       fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
661       fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
662       return NULL;
663    }
664 
665    assert(args.handle != 0);
666 
667    bo = CALLOC_STRUCT(radeon_bo);
668    if (!bo)
669       return NULL;
670 
671    pipe_reference_init(&bo->base.reference, 1);
672    bo->base.alignment_log2 = util_logbase2(alignment);
673    bo->base.usage = 0;
674    bo->base.size = size;
675    bo->base.vtbl = &radeon_bo_vtbl;
676    bo->rws = rws;
677    bo->handle = args.handle;
678    bo->va = 0;
679    bo->initial_domain = initial_domains;
680    bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
681    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
682 
683    if (heap >= 0) {
684       pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
685                           heap);
686    }
687 
688    if (rws->info.r600_has_virtual_memory) {
689       struct drm_radeon_gem_va va;
690       unsigned va_gap_size;
691 
692       va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
693 
694       if (flags & RADEON_FLAG_32BIT) {
695          bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
696                                        size + va_gap_size, alignment);
697          assert(bo->va + size < rws->vm32.end);
698       } else {
699          bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
700       }
701 
702       va.handle = bo->handle;
703       va.vm_id = 0;
704       va.operation = RADEON_VA_MAP;
705       va.flags = RADEON_VM_PAGE_READABLE |
706                  RADEON_VM_PAGE_WRITEABLE |
707                  RADEON_VM_PAGE_SNOOPED;
708       va.offset = bo->va;
709       r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
710       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
711          fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
712          fprintf(stderr, "radeon:    size      : %d bytes\n", size);
713          fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
714          fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
715          fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
716          radeon_bo_destroy(NULL, &bo->base);
717          return NULL;
718       }
719       mtx_lock(&rws->bo_handles_mutex);
720       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
721          struct pb_buffer *b = &bo->base;
722          struct radeon_bo *old_bo =
723                _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
724 
725          mtx_unlock(&rws->bo_handles_mutex);
726          pb_reference(&b, &old_bo->base);
727          return radeon_bo(b);
728       }
729 
730       _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
731       mtx_unlock(&rws->bo_handles_mutex);
732    }
733 
734    if (initial_domains & RADEON_DOMAIN_VRAM)
735       rws->allocated_vram += align(size, rws->info.gart_page_size);
736    else if (initial_domains & RADEON_DOMAIN_GTT)
737       rws->allocated_gtt += align(size, rws->info.gart_page_size);
738 
739    return bo;
740 }
741 
radeon_bo_can_reclaim(void * winsys,struct pb_buffer * _buf)742 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer *_buf)
743 {
744    struct radeon_bo *bo = radeon_bo(_buf);
745 
746    if (radeon_bo_is_referenced_by_any_cs(bo))
747       return false;
748 
749    return radeon_bo_wait(winsys, _buf, 0, RADEON_USAGE_READWRITE);
750 }
751 
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)752 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
753 {
754    struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
755 
756    return radeon_bo_can_reclaim(NULL, &bo->base);
757 }
758 
radeon_bo_slab_destroy(void * winsys,struct pb_buffer * _buf)759 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer *_buf)
760 {
761    struct radeon_bo *bo = radeon_bo(_buf);
762 
763    assert(!bo->handle);
764 
765    pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
766 }
767 
768 static const struct pb_vtbl radeon_winsys_bo_slab_vtbl = {
769    radeon_bo_slab_destroy
770    /* other functions are never called */
771 };
772 
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)773 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
774                                      unsigned entry_size,
775                                      unsigned group_index)
776 {
777    struct radeon_drm_winsys *ws = priv;
778    struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
779    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
780    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
781    unsigned base_hash;
782 
783    if (!slab)
784       return NULL;
785 
786    slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
787                                                     64 * 1024, 64 * 1024,
788                                                     domains, flags));
789    if (!slab->buffer)
790       goto fail;
791 
792    assert(slab->buffer->handle);
793 
794    slab->base.num_entries = slab->buffer->base.size / entry_size;
795    slab->base.num_free = slab->base.num_entries;
796    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
797    if (!slab->entries)
798       goto fail_buffer;
799 
800    list_inithead(&slab->base.free);
801 
802    base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
803 
804    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
805       struct radeon_bo *bo = &slab->entries[i];
806 
807       bo->base.alignment_log2 = util_logbase2(entry_size);
808       bo->base.usage = slab->buffer->base.usage;
809       bo->base.size = entry_size;
810       bo->base.vtbl = &radeon_winsys_bo_slab_vtbl;
811       bo->rws = ws;
812       bo->va = slab->buffer->va + i * entry_size;
813       bo->initial_domain = domains;
814       bo->hash = base_hash + i;
815       bo->u.slab.entry.slab = &slab->base;
816       bo->u.slab.entry.group_index = group_index;
817       bo->u.slab.entry.entry_size = entry_size;
818       bo->u.slab.real = slab->buffer;
819 
820       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
821    }
822 
823    return &slab->base;
824 
825 fail_buffer:
826    radeon_ws_bo_reference(&slab->buffer, NULL);
827 fail:
828    FREE(slab);
829    return NULL;
830 }
831 
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)832 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
833 {
834    struct radeon_slab *slab = (struct radeon_slab *)pslab;
835 
836    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
837       struct radeon_bo *bo = &slab->entries[i];
838       for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
839          radeon_ws_bo_reference(&bo->u.slab.fences[j], NULL);
840       FREE(bo->u.slab.fences);
841    }
842 
843    FREE(slab->entries);
844    radeon_ws_bo_reference(&slab->buffer, NULL);
845    FREE(slab);
846 }
847 
eg_tile_split(unsigned tile_split)848 static unsigned eg_tile_split(unsigned tile_split)
849 {
850    switch (tile_split) {
851    case 0:     tile_split = 64;    break;
852    case 1:     tile_split = 128;   break;
853    case 2:     tile_split = 256;   break;
854    case 3:     tile_split = 512;   break;
855    default:
856    case 4:     tile_split = 1024;  break;
857    case 5:     tile_split = 2048;  break;
858    case 6:     tile_split = 4096;  break;
859    }
860    return tile_split;
861 }
862 
eg_tile_split_rev(unsigned eg_tile_split)863 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
864 {
865    switch (eg_tile_split) {
866    case 64:    return 0;
867    case 128:   return 1;
868    case 256:   return 2;
869    case 512:   return 3;
870    default:
871    case 1024:  return 4;
872    case 2048:  return 5;
873    case 4096:  return 6;
874    }
875 }
876 
radeon_bo_get_metadata(struct radeon_winsys * rws,struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)877 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
878                                    struct pb_buffer *_buf,
879                                    struct radeon_bo_metadata *md,
880                                    struct radeon_surf *surf)
881 {
882    struct radeon_bo *bo = radeon_bo(_buf);
883    struct drm_radeon_gem_set_tiling args;
884 
885    assert(bo->handle && "must not be called for slab entries");
886 
887    memset(&args, 0, sizeof(args));
888 
889    args.handle = bo->handle;
890 
891    drmCommandWriteRead(bo->rws->fd,
892                        DRM_RADEON_GEM_GET_TILING,
893                        &args,
894                        sizeof(args));
895 
896    if (surf) {
897       if (args.tiling_flags & RADEON_TILING_MACRO)
898          md->mode = RADEON_SURF_MODE_2D;
899       else if (args.tiling_flags & RADEON_TILING_MICRO)
900          md->mode = RADEON_SURF_MODE_1D;
901       else
902          md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
903 
904       surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
905       surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
906       surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
907       surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
908       surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
909 
910       if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
911          surf->flags |= RADEON_SURF_SCANOUT;
912       else
913          surf->flags &= ~RADEON_SURF_SCANOUT;
914       return;
915    }
916 
917    md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
918    md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
919    if (args.tiling_flags & RADEON_TILING_MICRO)
920       md->u.legacy.microtile = RADEON_LAYOUT_TILED;
921    else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
922       md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
923 
924    if (args.tiling_flags & RADEON_TILING_MACRO)
925       md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
926 
927    md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
928    md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
929    md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
930    md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
931    md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
932    md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
933 }
934 
radeon_bo_set_metadata(struct radeon_winsys * rws,struct pb_buffer * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)935 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
936                                    struct pb_buffer *_buf,
937                                    struct radeon_bo_metadata *md,
938                                    struct radeon_surf *surf)
939 {
940    struct radeon_bo *bo = radeon_bo(_buf);
941    struct drm_radeon_gem_set_tiling args;
942 
943    assert(bo->handle && "must not be called for slab entries");
944 
945    memset(&args, 0, sizeof(args));
946 
947    os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
948 
949    if (surf) {
950       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
951          args.tiling_flags |= RADEON_TILING_MICRO;
952       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
953          args.tiling_flags |= RADEON_TILING_MACRO;
954 
955       args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
956                            RADEON_TILING_EG_BANKW_SHIFT;
957       args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
958                            RADEON_TILING_EG_BANKH_SHIFT;
959       if (surf->u.legacy.tile_split) {
960          args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
961                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
962                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
963       }
964       args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
965                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
966 
967       if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
968          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
969 
970       args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
971    } else {
972       if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
973          args.tiling_flags |= RADEON_TILING_MICRO;
974       else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
975          args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
976 
977       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
978          args.tiling_flags |= RADEON_TILING_MACRO;
979 
980       args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
981                            RADEON_TILING_EG_BANKW_SHIFT;
982       args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
983                            RADEON_TILING_EG_BANKH_SHIFT;
984       if (md->u.legacy.tile_split) {
985          args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
986                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
987                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
988       }
989       args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
990                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
991 
992       if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
993          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
994 
995       args.pitch = md->u.legacy.stride;
996    }
997 
998    args.handle = bo->handle;
999 
1000    drmCommandWriteRead(bo->rws->fd,
1001                        DRM_RADEON_GEM_SET_TILING,
1002                        &args,
1003                        sizeof(args));
1004 }
1005 
1006 static struct pb_buffer *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)1007 radeon_winsys_bo_create(struct radeon_winsys *rws,
1008                         uint64_t size,
1009                         unsigned alignment,
1010                         enum radeon_bo_domain domain,
1011                         enum radeon_bo_flag flags)
1012 {
1013    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1014    struct radeon_bo *bo;
1015    int heap = -1;
1016 
1017    assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
1018 
1019    /* Only 32-bit sizes are supported. */
1020    if (size > UINT_MAX)
1021       return NULL;
1022 
1023    /* VRAM implies WC. This is not optional. */
1024    if (domain & RADEON_DOMAIN_VRAM)
1025       flags |= RADEON_FLAG_GTT_WC;
1026    /* NO_CPU_ACCESS is valid with VRAM only. */
1027    if (domain != RADEON_DOMAIN_VRAM)
1028       flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
1029 
1030    /* Sub-allocate small buffers from slabs. */
1031    if (!(flags & RADEON_FLAG_NO_SUBALLOC) &&
1032        size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1033        ws->info.r600_has_virtual_memory &&
1034        alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1035       struct pb_slab_entry *entry;
1036       int heap = radeon_get_heap_index(domain, flags);
1037 
1038       if (heap < 0 || heap >= RADEON_MAX_SLAB_HEAPS)
1039          goto no_slab;
1040 
1041       entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1042       if (!entry) {
1043          /* Clear the cache and try again. */
1044          pb_cache_release_all_buffers(&ws->bo_cache);
1045 
1046          entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1047       }
1048       if (!entry)
1049          return NULL;
1050 
1051       bo = container_of(entry, struct radeon_bo, u.slab.entry);
1052 
1053       pipe_reference_init(&bo->base.reference, 1);
1054 
1055       return &bo->base;
1056    }
1057 no_slab:
1058 
1059    /* This flag is irrelevant for the cache. */
1060    flags &= ~RADEON_FLAG_NO_SUBALLOC;
1061 
1062    /* Align size to page size. This is the minimum alignment for normal
1063     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1064     * like constant/uniform buffers, can benefit from better and more reuse.
1065     */
1066    size = align(size, ws->info.gart_page_size);
1067    alignment = align(alignment, ws->info.gart_page_size);
1068 
1069    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING;
1070 
1071    /* Shared resources don't use cached heaps. */
1072    if (use_reusable_pool) {
1073       heap = radeon_get_heap_index(domain, flags);
1074       assert(heap >= 0 && heap < RADEON_MAX_CACHED_HEAPS);
1075 
1076       bo = radeon_bo(pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment,
1077                                              0, heap));
1078       if (bo)
1079          return &bo->base;
1080    }
1081 
1082    bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1083    if (!bo) {
1084       /* Clear the cache and try again. */
1085       if (ws->info.r600_has_virtual_memory)
1086          pb_slabs_reclaim(&ws->bo_slabs);
1087       pb_cache_release_all_buffers(&ws->bo_cache);
1088       bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1089       if (!bo)
1090          return NULL;
1091    }
1092 
1093    bo->u.real.use_reusable_pool = use_reusable_pool;
1094 
1095    mtx_lock(&ws->bo_handles_mutex);
1096    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1097    mtx_unlock(&ws->bo_handles_mutex);
1098 
1099    return &bo->base;
1100 }
1101 
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size)1102 static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1103                                                    void *pointer, uint64_t size)
1104 {
1105    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1106    struct drm_radeon_gem_userptr args;
1107    struct radeon_bo *bo;
1108    int r;
1109 
1110    bo = CALLOC_STRUCT(radeon_bo);
1111    if (!bo)
1112       return NULL;
1113 
1114    memset(&args, 0, sizeof(args));
1115    args.addr = (uintptr_t)pointer;
1116    args.size = align(size, ws->info.gart_page_size);
1117    args.flags = RADEON_GEM_USERPTR_ANONONLY |
1118                 RADEON_GEM_USERPTR_VALIDATE |
1119                 RADEON_GEM_USERPTR_REGISTER;
1120    if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1121                            &args, sizeof(args))) {
1122       FREE(bo);
1123       return NULL;
1124    }
1125 
1126    assert(args.handle != 0);
1127 
1128    mtx_lock(&ws->bo_handles_mutex);
1129 
1130    /* Initialize it. */
1131    pipe_reference_init(&bo->base.reference, 1);
1132    bo->handle = args.handle;
1133    bo->base.alignment_log2 = 0;
1134    bo->base.size = size;
1135    bo->base.vtbl = &radeon_bo_vtbl;
1136    bo->rws = ws;
1137    bo->user_ptr = pointer;
1138    bo->va = 0;
1139    bo->initial_domain = RADEON_DOMAIN_GTT;
1140    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1141    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1142 
1143    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1144 
1145    mtx_unlock(&ws->bo_handles_mutex);
1146 
1147    if (ws->info.r600_has_virtual_memory) {
1148       struct drm_radeon_gem_va va;
1149 
1150       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1151 
1152       va.handle = bo->handle;
1153       va.operation = RADEON_VA_MAP;
1154       va.vm_id = 0;
1155       va.offset = bo->va;
1156       va.flags = RADEON_VM_PAGE_READABLE |
1157                  RADEON_VM_PAGE_WRITEABLE |
1158                  RADEON_VM_PAGE_SNOOPED;
1159       va.offset = bo->va;
1160       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1161       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1162          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1163          radeon_bo_destroy(NULL, &bo->base);
1164          return NULL;
1165       }
1166       mtx_lock(&ws->bo_handles_mutex);
1167       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1168          struct pb_buffer *b = &bo->base;
1169          struct radeon_bo *old_bo =
1170                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1171 
1172          mtx_unlock(&ws->bo_handles_mutex);
1173          pb_reference(&b, &old_bo->base);
1174          return b;
1175       }
1176 
1177       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1178       mtx_unlock(&ws->bo_handles_mutex);
1179    }
1180 
1181    ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1182 
1183    return (struct pb_buffer*)bo;
1184 }
1185 
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment,bool is_dri_prime_linear_buffer)1186 static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1187                                                       struct winsys_handle *whandle,
1188                                                       unsigned vm_alignment,
1189                                                       bool is_dri_prime_linear_buffer)
1190 {
1191    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1192    struct radeon_bo *bo;
1193    int r;
1194    unsigned handle;
1195    uint64_t size = 0;
1196 
1197    /* We must maintain a list of pairs <handle, bo>, so that we always return
1198     * the same BO for one particular handle. If we didn't do that and created
1199     * more than one BO for the same handle and then relocated them in a CS,
1200     * we would hit a deadlock in the kernel.
1201     *
1202     * The list of pairs is guarded by a mutex, of course. */
1203    mtx_lock(&ws->bo_handles_mutex);
1204 
1205    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1206       /* First check if there already is an existing bo for the handle. */
1207       bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1208    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1209       /* We must first get the GEM handle, as fds are unreliable keys */
1210       r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1211       if (r)
1212          goto fail;
1213       bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1214    } else {
1215       /* Unknown handle type */
1216       goto fail;
1217    }
1218 
1219    if (bo) {
1220       /* Increase the refcount. */
1221       struct pb_buffer *b = NULL;
1222       pb_reference(&b, &bo->base);
1223       goto done;
1224    }
1225 
1226    /* There isn't, create a new one. */
1227    bo = CALLOC_STRUCT(radeon_bo);
1228    if (!bo) {
1229       goto fail;
1230    }
1231 
1232    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1233       struct drm_gem_open open_arg = {};
1234       memset(&open_arg, 0, sizeof(open_arg));
1235       /* Open the BO. */
1236       open_arg.name = whandle->handle;
1237       if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1238          FREE(bo);
1239          goto fail;
1240       }
1241       handle = open_arg.handle;
1242       size = open_arg.size;
1243       bo->flink_name = whandle->handle;
1244    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1245       size = lseek(whandle->handle, 0, SEEK_END);
1246       /*
1247        * Could check errno to determine whether the kernel is new enough, but
1248        * it doesn't really matter why this failed, just that it failed.
1249        */
1250       if (size == (off_t)-1) {
1251          FREE(bo);
1252          goto fail;
1253       }
1254       lseek(whandle->handle, 0, SEEK_SET);
1255    }
1256 
1257    assert(handle != 0);
1258 
1259    bo->handle = handle;
1260 
1261    /* Initialize it. */
1262    pipe_reference_init(&bo->base.reference, 1);
1263    bo->base.alignment_log2 = 0;
1264    bo->base.size = (unsigned) size;
1265    bo->base.vtbl = &radeon_bo_vtbl;
1266    bo->rws = ws;
1267    bo->va = 0;
1268    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1269    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1270 
1271    if (bo->flink_name)
1272       _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1273 
1274    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1275 
1276 done:
1277    mtx_unlock(&ws->bo_handles_mutex);
1278 
1279    if (ws->info.r600_has_virtual_memory && !bo->va) {
1280       struct drm_radeon_gem_va va;
1281 
1282       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1283 
1284       va.handle = bo->handle;
1285       va.operation = RADEON_VA_MAP;
1286       va.vm_id = 0;
1287       va.offset = bo->va;
1288       va.flags = RADEON_VM_PAGE_READABLE |
1289                  RADEON_VM_PAGE_WRITEABLE |
1290                  RADEON_VM_PAGE_SNOOPED;
1291       va.offset = bo->va;
1292       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1293       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1294          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1295          radeon_bo_destroy(NULL, &bo->base);
1296          return NULL;
1297       }
1298       mtx_lock(&ws->bo_handles_mutex);
1299       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1300          struct pb_buffer *b = &bo->base;
1301          struct radeon_bo *old_bo =
1302                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1303 
1304          mtx_unlock(&ws->bo_handles_mutex);
1305          pb_reference(&b, &old_bo->base);
1306          return b;
1307       }
1308 
1309       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1310       mtx_unlock(&ws->bo_handles_mutex);
1311    }
1312 
1313    bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1314 
1315    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1316       ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1317    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1318       ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1319 
1320    return (struct pb_buffer*)bo;
1321 
1322 fail:
1323    mtx_unlock(&ws->bo_handles_mutex);
1324    return NULL;
1325 }
1326 
radeon_winsys_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer * buffer,struct winsys_handle * whandle)1327 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1328                                         struct pb_buffer *buffer,
1329                                         struct winsys_handle *whandle)
1330 {
1331    struct drm_gem_flink flink;
1332    struct radeon_bo *bo = radeon_bo(buffer);
1333    struct radeon_drm_winsys *ws = bo->rws;
1334 
1335    /* Don't allow exports of slab entries. */
1336    if (!bo->handle)
1337       return false;
1338 
1339    memset(&flink, 0, sizeof(flink));
1340 
1341    bo->u.real.use_reusable_pool = false;
1342 
1343    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1344       if (!bo->flink_name) {
1345          flink.handle = bo->handle;
1346 
1347          if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1348             return false;
1349          }
1350 
1351          bo->flink_name = flink.name;
1352 
1353          mtx_lock(&ws->bo_handles_mutex);
1354          _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1355          mtx_unlock(&ws->bo_handles_mutex);
1356       }
1357       whandle->handle = bo->flink_name;
1358    } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1359       whandle->handle = bo->handle;
1360    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1361       if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1362          return false;
1363    }
1364 
1365    return true;
1366 }
1367 
radeon_winsys_bo_is_user_ptr(struct pb_buffer * buf)1368 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer *buf)
1369 {
1370    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1371 }
1372 
radeon_winsys_bo_is_suballocated(struct pb_buffer * buf)1373 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer *buf)
1374 {
1375    return !((struct radeon_bo*)buf)->handle;
1376 }
1377 
radeon_winsys_bo_va(struct pb_buffer * buf)1378 static uint64_t radeon_winsys_bo_va(struct pb_buffer *buf)
1379 {
1380    return ((struct radeon_bo*)buf)->va;
1381 }
1382 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer * buf)1383 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer *buf)
1384 {
1385    struct radeon_bo *bo = radeon_bo(buf);
1386 
1387    if (bo->handle)
1388       return 0;
1389 
1390    return bo->va - bo->u.slab.real->va;
1391 }
1392 
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1393 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1394 {
1395    ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1396    ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1397    ws->base.buffer_map = radeon_bo_map;
1398    ws->base.buffer_unmap = radeon_bo_unmap;
1399    ws->base.buffer_wait = radeon_bo_wait;
1400    ws->base.buffer_create = radeon_winsys_bo_create;
1401    ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1402    ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1403    ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1404    ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1405    ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1406    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1407    ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1408    ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1409 }
1410