1 /*
2  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3  * Copyright © 2015 Advanced Micro Devices, Inc.
4  * Copyright © 2021 Valve Corporation
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
19  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * The above copyright notice and this permission notice (including the
25  * next paragraph) shall be included in all copies or substantial portions
26  * of the Software.
27  *
28  * Authors:
29  *    Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
30  */
31 
32 #include "zink_bo.h"
33 #include "zink_resource.h"
34 #include "zink_screen.h"
35 #include "util/u_hash_table.h"
36 
37 struct zink_bo;
38 
39 struct zink_sparse_backing_chunk {
40    uint32_t begin, end;
41 };
42 
43 
44 /*
45  * Sub-allocation information for a real buffer used as backing memory of a
46  * sparse buffer.
47  */
48 struct zink_sparse_backing {
49    struct list_head list;
50 
51    struct zink_bo *bo;
52 
53    /* Sorted list of free chunks. */
54    struct zink_sparse_backing_chunk *chunks;
55    uint32_t max_chunks;
56    uint32_t num_chunks;
57 };
58 
59 struct zink_sparse_commitment {
60    struct zink_sparse_backing *backing;
61    uint32_t page;
62 };
63 
64 struct zink_slab {
65    struct pb_slab base;
66    unsigned entry_size;
67    struct zink_bo *buffer;
68    struct zink_bo *entries;
69 };
70 
71 
72 ALWAYS_INLINE static struct zink_slab *
zink_slab(struct pb_slab * pslab)73 zink_slab(struct pb_slab *pslab)
74 {
75    return (struct zink_slab*)pslab;
76 }
77 
78 static struct pb_slabs *
get_slabs(struct zink_screen * screen,uint64_t size,enum zink_alloc_flag flags)79 get_slabs(struct zink_screen *screen, uint64_t size, enum zink_alloc_flag flags)
80 {
81    //struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
82       //screen->bo_slabs_encrypted : screen->bo_slabs;
83 
84    struct pb_slabs *bo_slabs = screen->pb.bo_slabs;
85    /* Find the correct slab allocator for the given size. */
86    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
87       struct pb_slabs *slabs = &bo_slabs[i];
88 
89       if (size <= 1ULL << (slabs->min_order + slabs->num_orders - 1))
90          return slabs;
91    }
92 
93    assert(0);
94    return NULL;
95 }
96 
97 /* Return the power of two size of a slab entry matching the input size. */
98 static unsigned
get_slab_pot_entry_size(struct zink_screen * screen,unsigned size)99 get_slab_pot_entry_size(struct zink_screen *screen, unsigned size)
100 {
101    unsigned entry_size = util_next_power_of_two(size);
102    unsigned min_entry_size = 1 << screen->pb.bo_slabs[0].min_order;
103 
104    return MAX2(entry_size, min_entry_size);
105 }
106 
107 /* Return the slab entry alignment. */
get_slab_entry_alignment(struct zink_screen * screen,unsigned size)108 static unsigned get_slab_entry_alignment(struct zink_screen *screen, unsigned size)
109 {
110    unsigned entry_size = get_slab_pot_entry_size(screen, size);
111 
112    if (size <= entry_size * 3 / 4)
113       return entry_size / 4;
114 
115    return entry_size;
116 }
117 
118 static void
bo_destroy(struct zink_screen * screen,struct pb_buffer * pbuf)119 bo_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
120 {
121    struct zink_bo *bo = zink_bo(pbuf);
122 
123    simple_mtx_lock(&screen->pb.bo_export_table_lock);
124    _mesa_hash_table_remove_key(screen->pb.bo_export_table, bo);
125    simple_mtx_unlock(&screen->pb.bo_export_table_lock);
126 
127    if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) {
128       bo->u.real.map_count = 1;
129       bo->u.real.cpu_ptr = NULL;
130       zink_bo_unmap(screen, bo);
131    }
132 
133    VKSCR(FreeMemory)(screen->dev, bo->mem, NULL);
134 
135    simple_mtx_destroy(&bo->lock);
136    FREE(bo);
137 }
138 
139 static bool
bo_can_reclaim(struct zink_screen * screen,struct pb_buffer * pbuf)140 bo_can_reclaim(struct zink_screen *screen, struct pb_buffer *pbuf)
141 {
142    struct zink_bo *bo = zink_bo(pbuf);
143 
144    return zink_screen_usage_check_completion(screen, bo->reads) && zink_screen_usage_check_completion(screen, bo->writes);
145 }
146 
147 static bool
bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)148 bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
149 {
150    struct zink_bo *bo = container_of(entry, struct zink_bo, u.slab.entry);
151 
152    return bo_can_reclaim(priv, &bo->base);
153 }
154 
155 static void
bo_slab_free(struct zink_screen * screen,struct pb_slab * pslab)156 bo_slab_free(struct zink_screen *screen, struct pb_slab *pslab)
157 {
158    struct zink_slab *slab = zink_slab(pslab);
159    ASSERTED unsigned slab_size = slab->buffer->base.size;
160 
161    assert(slab->base.num_entries * slab->entry_size <= slab_size);
162    FREE(slab->entries);
163    zink_bo_unref(screen, slab->buffer);
164    FREE(slab);
165 }
166 
167 static void
bo_slab_destroy(struct zink_screen * screen,struct pb_buffer * pbuf)168 bo_slab_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
169 {
170    struct zink_bo *bo = zink_bo(pbuf);
171 
172    assert(!bo->mem);
173 
174    //if (bo->base.usage & RADEON_FLAG_ENCRYPTED)
175       //pb_slab_free(get_slabs(screen, bo->base.size, RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
176    //else
177       pb_slab_free(get_slabs(screen, bo->base.size, 0), &bo->u.slab.entry);
178 }
179 
180 static void
clean_up_buffer_managers(struct zink_screen * screen)181 clean_up_buffer_managers(struct zink_screen *screen)
182 {
183    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
184       pb_slabs_reclaim(&screen->pb.bo_slabs[i]);
185       //if (screen->info.has_tmz_support)
186          //pb_slabs_reclaim(&screen->bo_slabs_encrypted[i]);
187    }
188 
189    pb_cache_release_all_buffers(&screen->pb.bo_cache);
190 }
191 
192 static unsigned
get_optimal_alignment(struct zink_screen * screen,uint64_t size,unsigned alignment)193 get_optimal_alignment(struct zink_screen *screen, uint64_t size, unsigned alignment)
194 {
195    /* Increase the alignment for faster address translation and better memory
196     * access pattern.
197     */
198    if (size >= 4096) {
199       alignment = MAX2(alignment, 4096);
200    } else if (size) {
201       unsigned msb = util_last_bit(size);
202 
203       alignment = MAX2(alignment, 1u << (msb - 1));
204    }
205    return alignment;
206 }
207 
208 static void
bo_destroy_or_cache(struct zink_screen * screen,struct pb_buffer * pbuf)209 bo_destroy_or_cache(struct zink_screen *screen, struct pb_buffer *pbuf)
210 {
211    struct zink_bo *bo = zink_bo(pbuf);
212 
213    assert(bo->mem); /* slab buffers have a separate vtbl */
214    bo->reads = NULL;
215    bo->writes = NULL;
216 
217    if (bo->u.real.use_reusable_pool)
218       pb_cache_add_buffer(bo->cache_entry);
219    else
220       bo_destroy(screen, pbuf);
221 }
222 
223 static const struct pb_vtbl bo_vtbl = {
224    /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
225    (void*)bo_destroy_or_cache
226    /* other functions are never called */
227 };
228 
229 static struct zink_bo *
bo_create_internal(struct zink_screen * screen,uint64_t size,unsigned alignment,enum zink_heap heap,unsigned flags,const void * pNext)230 bo_create_internal(struct zink_screen *screen,
231                    uint64_t size,
232                    unsigned alignment,
233                    enum zink_heap heap,
234                    unsigned flags,
235                    const void *pNext)
236 {
237    struct zink_bo *bo;
238    bool init_pb_cache;
239 
240    /* too big for vk alloc */
241    if (size > UINT32_MAX)
242       return NULL;
243 
244    alignment = get_optimal_alignment(screen, size, alignment);
245 
246    VkMemoryAllocateInfo mai;
247    mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
248    mai.pNext = pNext;
249    mai.allocationSize = size;
250    mai.memoryTypeIndex = screen->heap_map[heap];
251    if (screen->info.mem_props.memoryTypes[mai.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
252       alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
253       mai.allocationSize = align64(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment);
254    }
255    unsigned heap_idx = screen->info.mem_props.memoryTypes[screen->heap_map[heap]].heapIndex;
256    if (mai.allocationSize > screen->info.mem_props.memoryHeaps[heap_idx].size) {
257       mesa_loge("zink: can't allocate %"PRIu64" bytes from heap that's only %"PRIu64" bytes!\n", mai.allocationSize, screen->info.mem_props.memoryHeaps[heap_idx].size);
258       return NULL;
259    }
260 
261    /* all non-suballocated bo can cache */
262    init_pb_cache = !pNext;
263 
264    bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
265    if (!bo) {
266       return NULL;
267    }
268 
269    if (init_pb_cache) {
270       bo->u.real.use_reusable_pool = true;
271       pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base, heap);
272    }
273 
274    VkResult ret = VKSCR(AllocateMemory)(screen->dev, &mai, NULL, &bo->mem);
275    if (!zink_screen_handle_vkresult(screen, ret))
276       goto fail;
277 
278    simple_mtx_init(&bo->lock, mtx_plain);
279    pipe_reference_init(&bo->base.reference, 1);
280    bo->base.alignment_log2 = util_logbase2(alignment);
281    bo->base.size = mai.allocationSize;
282    bo->base.vtbl = &bo_vtbl;
283    bo->base.placement = vk_domain_from_heap(heap);
284    bo->base.usage = flags;
285    bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
286 
287    return bo;
288 
289 fail:
290    bo_destroy(screen, (void*)bo);
291    return NULL;
292 }
293 
294 /*
295  * Attempt to allocate the given number of backing pages. Fewer pages may be
296  * allocated (depending on the fragmentation of existing backing buffers),
297  * which will be reflected by a change to *pnum_pages.
298  */
299 static struct zink_sparse_backing *
sparse_backing_alloc(struct zink_screen * screen,struct zink_bo * bo,uint32_t * pstart_page,uint32_t * pnum_pages)300 sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo,
301                      uint32_t *pstart_page, uint32_t *pnum_pages)
302 {
303    struct zink_sparse_backing *best_backing;
304    unsigned best_idx;
305    uint32_t best_num_pages;
306 
307    best_backing = NULL;
308    best_idx = 0;
309    best_num_pages = 0;
310 
311    /* This is a very simple and inefficient best-fit algorithm. */
312    list_for_each_entry(struct zink_sparse_backing, backing, &bo->u.sparse.backing, list) {
313       for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
314          uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
315          if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
316             (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
317             best_backing = backing;
318             best_idx = idx;
319             best_num_pages = cur_num_pages;
320          }
321       }
322    }
323 
324    /* Allocate a new backing buffer if necessary. */
325    if (!best_backing) {
326       struct pb_buffer *buf;
327       uint64_t size;
328       uint32_t pages;
329 
330       best_backing = CALLOC_STRUCT(zink_sparse_backing);
331       if (!best_backing)
332          return NULL;
333 
334       best_backing->max_chunks = 4;
335       best_backing->chunks = CALLOC(best_backing->max_chunks,
336                                     sizeof(*best_backing->chunks));
337       if (!best_backing->chunks) {
338          FREE(best_backing);
339          return NULL;
340       }
341 
342       assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE));
343 
344       size = MIN3(bo->base.size / 16,
345                   8 * 1024 * 1024,
346                   bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE);
347       size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
348 
349       buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE,
350                            ZINK_HEAP_DEVICE_LOCAL, ZINK_ALLOC_NO_SUBALLOC, NULL);
351       if (!buf) {
352          FREE(best_backing->chunks);
353          FREE(best_backing);
354          return NULL;
355       }
356 
357       /* We might have gotten a bigger buffer than requested via caching. */
358       pages = buf->size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
359 
360       best_backing->bo = zink_bo(buf);
361       best_backing->num_chunks = 1;
362       best_backing->chunks[0].begin = 0;
363       best_backing->chunks[0].end = pages;
364 
365       list_add(&best_backing->list, &bo->u.sparse.backing);
366       bo->u.sparse.num_backing_pages += pages;
367 
368       best_idx = 0;
369       best_num_pages = pages;
370    }
371 
372    *pnum_pages = MIN2(*pnum_pages, best_num_pages);
373    *pstart_page = best_backing->chunks[best_idx].begin;
374    best_backing->chunks[best_idx].begin += *pnum_pages;
375 
376    if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
377       memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
378               sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
379       best_backing->num_chunks--;
380    }
381 
382    return best_backing;
383 }
384 
385 static void
sparse_free_backing_buffer(struct zink_screen * screen,struct zink_bo * bo,struct zink_sparse_backing * backing)386 sparse_free_backing_buffer(struct zink_screen *screen, struct zink_bo *bo,
387                            struct zink_sparse_backing *backing)
388 {
389    bo->u.sparse.num_backing_pages -= backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
390 
391    list_del(&backing->list);
392    zink_bo_unref(screen, backing->bo);
393    FREE(backing->chunks);
394    FREE(backing);
395 }
396 
397 /*
398  * Return a range of pages from the given backing buffer back into the
399  * free structure.
400  */
401 static bool
sparse_backing_free(struct zink_screen * screen,struct zink_bo * bo,struct zink_sparse_backing * backing,uint32_t start_page,uint32_t num_pages)402 sparse_backing_free(struct zink_screen *screen, struct zink_bo *bo,
403                     struct zink_sparse_backing *backing,
404                     uint32_t start_page, uint32_t num_pages)
405 {
406    uint32_t end_page = start_page + num_pages;
407    unsigned low = 0;
408    unsigned high = backing->num_chunks;
409 
410    /* Find the first chunk with begin >= start_page. */
411    while (low < high) {
412       unsigned mid = low + (high - low) / 2;
413 
414       if (backing->chunks[mid].begin >= start_page)
415          high = mid;
416       else
417          low = mid + 1;
418    }
419 
420    assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
421    assert(low == 0 || backing->chunks[low - 1].end <= start_page);
422 
423    if (low > 0 && backing->chunks[low - 1].end == start_page) {
424       backing->chunks[low - 1].end = end_page;
425 
426       if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
427          backing->chunks[low - 1].end = backing->chunks[low].end;
428          memmove(&backing->chunks[low], &backing->chunks[low + 1],
429                  sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
430          backing->num_chunks--;
431       }
432    } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
433       backing->chunks[low].begin = start_page;
434    } else {
435       if (backing->num_chunks >= backing->max_chunks) {
436          unsigned new_max_chunks = 2 * backing->max_chunks;
437          struct zink_sparse_backing_chunk *new_chunks =
438             REALLOC(backing->chunks,
439                     sizeof(*backing->chunks) * backing->max_chunks,
440                     sizeof(*backing->chunks) * new_max_chunks);
441          if (!new_chunks)
442             return false;
443 
444          backing->max_chunks = new_max_chunks;
445          backing->chunks = new_chunks;
446       }
447 
448       memmove(&backing->chunks[low + 1], &backing->chunks[low],
449               sizeof(*backing->chunks) * (backing->num_chunks - low));
450       backing->chunks[low].begin = start_page;
451       backing->chunks[low].end = end_page;
452       backing->num_chunks++;
453    }
454 
455    if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
456        backing->chunks[0].end == backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE)
457       sparse_free_backing_buffer(screen, bo, backing);
458 
459    return true;
460 }
461 
462 static void
bo_sparse_destroy(struct zink_screen * screen,struct pb_buffer * pbuf)463 bo_sparse_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
464 {
465    struct zink_bo *bo = zink_bo(pbuf);
466 
467    assert(!bo->mem && bo->base.usage & ZINK_ALLOC_SPARSE);
468 
469    while (!list_is_empty(&bo->u.sparse.backing)) {
470       sparse_free_backing_buffer(screen, bo,
471                                  container_of(bo->u.sparse.backing.next,
472                                               struct zink_sparse_backing, list));
473    }
474 
475    FREE(bo->u.sparse.commitments);
476    simple_mtx_destroy(&bo->lock);
477    FREE(bo);
478 }
479 
480 static const struct pb_vtbl bo_sparse_vtbl = {
481    /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
482    (void*)bo_sparse_destroy
483    /* other functions are never called */
484 };
485 
486 static struct pb_buffer *
bo_sparse_create(struct zink_screen * screen,uint64_t size)487 bo_sparse_create(struct zink_screen *screen, uint64_t size)
488 {
489    struct zink_bo *bo;
490 
491    /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
492     * that exceed this limit. This is not really a restriction: we don't have
493     * that much virtual address space anyway.
494     */
495    if (size > (uint64_t)INT32_MAX * ZINK_SPARSE_BUFFER_PAGE_SIZE)
496       return NULL;
497 
498    bo = CALLOC_STRUCT(zink_bo);
499    if (!bo)
500       return NULL;
501 
502    simple_mtx_init(&bo->lock, mtx_plain);
503    pipe_reference_init(&bo->base.reference, 1);
504    bo->base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
505    bo->base.size = size;
506    bo->base.vtbl = &bo_sparse_vtbl;
507    bo->base.placement = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
508    bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
509    bo->base.usage = ZINK_ALLOC_SPARSE;
510 
511    bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
512    bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
513                                      sizeof(*bo->u.sparse.commitments));
514    if (!bo->u.sparse.commitments)
515       goto error_alloc_commitments;
516 
517    list_inithead(&bo->u.sparse.backing);
518 
519    return &bo->base;
520 
521 error_alloc_commitments:
522    simple_mtx_destroy(&bo->lock);
523    FREE(bo);
524    return NULL;
525 }
526 
527 struct pb_buffer *
zink_bo_create(struct zink_screen * screen,uint64_t size,unsigned alignment,enum zink_heap heap,enum zink_alloc_flag flags,const void * pNext)528 zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext)
529 {
530    struct zink_bo *bo;
531    /* pull in sparse flag */
532    flags |= zink_alloc_flags_from_heap(heap);
533 
534    //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
535       //screen->bo_slabs_encrypted : screen->bo_slabs;
536    struct pb_slabs *slabs = screen->pb.bo_slabs;
537 
538    struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
539    unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
540 
541    /* Sub-allocate small buffers from slabs. */
542    if (!(flags & (ZINK_ALLOC_NO_SUBALLOC | ZINK_ALLOC_SPARSE)) &&
543        size <= max_slab_entry_size) {
544       struct pb_slab_entry *entry;
545 
546       if (heap < 0 || heap >= ZINK_HEAP_MAX)
547          goto no_slab;
548 
549       unsigned alloc_size = size;
550 
551       /* Always use slabs for sizes less than 4 KB because the kernel aligns
552        * everything to 4 KB.
553        */
554       if (size < alignment && alignment <= 4 * 1024)
555          alloc_size = alignment;
556 
557       if (alignment > get_slab_entry_alignment(screen, alloc_size)) {
558          /* 3/4 allocations can return too small alignment. Try again with a power of two
559           * allocation size.
560           */
561          unsigned pot_size = get_slab_pot_entry_size(screen, alloc_size);
562 
563          if (alignment <= pot_size) {
564             /* This size works but wastes some memory to fulfil the alignment. */
565             alloc_size = pot_size;
566          } else {
567             goto no_slab; /* can't fulfil alignment requirements */
568          }
569       }
570 
571       struct pb_slabs *slabs = get_slabs(screen, alloc_size, flags);
572       entry = pb_slab_alloc(slabs, alloc_size, heap);
573       if (!entry) {
574          /* Clean up buffer managers and try again. */
575          clean_up_buffer_managers(screen);
576 
577          entry = pb_slab_alloc(slabs, alloc_size, heap);
578       }
579       if (!entry)
580          return NULL;
581 
582       bo = container_of(entry, struct zink_bo, u.slab.entry);
583       pipe_reference_init(&bo->base.reference, 1);
584       bo->base.size = size;
585       assert(alignment <= 1 << bo->base.alignment_log2);
586 
587       return &bo->base;
588    }
589 no_slab:
590 
591    if (flags & ZINK_ALLOC_SPARSE) {
592       assert(ZINK_SPARSE_BUFFER_PAGE_SIZE % alignment == 0);
593 
594       return bo_sparse_create(screen, size);
595    }
596 
597    /* Align size to page size. This is the minimum alignment for normal
598     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
599     * like constant/uniform buffers, can benefit from better and more reuse.
600     */
601    if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE) {
602       size = align64(size, screen->info.props.limits.minMemoryMapAlignment);
603       alignment = align(alignment, screen->info.props.limits.minMemoryMapAlignment);
604    }
605 
606    bool use_reusable_pool = !(flags & ZINK_ALLOC_NO_SUBALLOC);
607 
608    if (use_reusable_pool) {
609        /* Get a buffer from the cache. */
610        bo = (struct zink_bo*)
611             pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, heap);
612        if (bo)
613           return &bo->base;
614    }
615 
616    /* Create a new one. */
617    bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
618    if (!bo) {
619       /* Clean up buffer managers and try again. */
620       clean_up_buffer_managers(screen);
621 
622       bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
623       if (!bo)
624          return NULL;
625    }
626 
627    return &bo->base;
628 }
629 
630 void *
zink_bo_map(struct zink_screen * screen,struct zink_bo * bo)631 zink_bo_map(struct zink_screen *screen, struct zink_bo *bo)
632 {
633    void *cpu = NULL;
634    uint64_t offset = 0;
635    struct zink_bo *real;
636 
637    if (bo->mem) {
638       real = bo;
639    } else {
640       real = bo->u.slab.real;
641       offset = bo->offset - real->offset;
642    }
643 
644    cpu = p_atomic_read(&real->u.real.cpu_ptr);
645    if (!cpu) {
646       simple_mtx_lock(&real->lock);
647       /* Must re-check due to the possibility of a race. Re-check need not
648        * be atomic thanks to the lock. */
649       cpu = real->u.real.cpu_ptr;
650       if (!cpu) {
651          VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.size, 0, &cpu);
652          if (result != VK_SUCCESS) {
653             simple_mtx_unlock(&real->lock);
654             return NULL;
655          }
656          p_atomic_set(&real->u.real.cpu_ptr, cpu);
657       }
658       simple_mtx_unlock(&real->lock);
659    }
660    p_atomic_inc(&real->u.real.map_count);
661 
662    return (uint8_t*)cpu + offset;
663 }
664 
665 void
zink_bo_unmap(struct zink_screen * screen,struct zink_bo * bo)666 zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo)
667 {
668    struct zink_bo *real = bo->mem ? bo : bo->u.slab.real;
669 
670    assert(real->u.real.map_count != 0 && "too many unmaps");
671 
672    if (p_atomic_dec_zero(&real->u.real.map_count)) {
673       p_atomic_set(&real->u.real.cpu_ptr, NULL);
674       VKSCR(UnmapMemory)(screen->dev, real->mem);
675    }
676 }
677 
678 static bool
do_commit_single(struct zink_screen * screen,struct zink_resource * res,struct zink_bo * bo,uint32_t offset,uint32_t size,bool commit)679 do_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t offset, uint32_t size, bool commit)
680 {
681    VkBindSparseInfo sparse = {0};
682    sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
683    sparse.bufferBindCount = 1;
684 
685    VkSparseBufferMemoryBindInfo sparse_bind;
686    sparse_bind.buffer = res->obj->buffer;
687    sparse_bind.bindCount = 1;
688    sparse.pBufferBinds = &sparse_bind;
689 
690    VkSparseMemoryBind mem_bind;
691    mem_bind.resourceOffset = offset;
692    mem_bind.size = MIN2(res->base.b.width0 - offset, size);
693    mem_bind.memory = commit ? bo->mem : VK_NULL_HANDLE;
694    mem_bind.memoryOffset = 0;
695    mem_bind.flags = 0;
696    sparse_bind.pBinds = &mem_bind;
697 
698    VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue;
699 
700    simple_mtx_lock(&screen->queue_lock);
701    VkResult ret = VKSCR(QueueBindSparse)(queue, 1, &sparse, VK_NULL_HANDLE);
702    simple_mtx_unlock(&screen->queue_lock);
703    return zink_screen_handle_vkresult(screen, ret);
704 }
705 
706 bool
zink_bo_commit(struct zink_screen * screen,struct zink_resource * res,uint32_t offset,uint32_t size,bool commit)707 zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit)
708 {
709    bool ok = true;
710    struct zink_bo *bo = res->obj->bo;
711    assert(offset % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0);
712    assert(offset <= bo->base.size);
713    assert(size <= bo->base.size - offset);
714    assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == bo->base.size);
715 
716    struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
717 
718    uint32_t va_page = offset / ZINK_SPARSE_BUFFER_PAGE_SIZE;
719    uint32_t end_va_page = va_page + DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
720 
721    simple_mtx_lock(&bo->lock);
722 
723    if (commit) {
724       while (va_page < end_va_page) {
725          uint32_t span_va_page;
726 
727          /* Skip pages that are already committed. */
728          if (comm[va_page].backing) {
729             va_page++;
730             continue;
731          }
732 
733          /* Determine length of uncommitted span. */
734          span_va_page = va_page;
735          while (va_page < end_va_page && !comm[va_page].backing)
736             va_page++;
737 
738          /* Fill the uncommitted span with chunks of backing memory. */
739          while (span_va_page < va_page) {
740             struct zink_sparse_backing *backing;
741             uint32_t backing_start, backing_size;
742 
743             backing_size = va_page - span_va_page;
744             backing = sparse_backing_alloc(screen, bo, &backing_start, &backing_size);
745             if (!backing) {
746                ok = false;
747                goto out;
748             }
749             if (!do_commit_single(screen, res, backing->bo,
750                                   (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
751                                   (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true)) {
752 
753                ok = sparse_backing_free(screen, bo, backing, backing_start, backing_size);
754                assert(ok && "sufficient memory should already be allocated");
755 
756                ok = false;
757                goto out;
758             }
759 
760             while (backing_size) {
761                comm[span_va_page].backing = backing;
762                comm[span_va_page].page = backing_start;
763                span_va_page++;
764                backing_start++;
765                backing_size--;
766             }
767          }
768       }
769    } else {
770       if (!do_commit_single(screen, res, NULL,
771                             (uint64_t)va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
772                             (uint64_t)(end_va_page - va_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false)) {
773          ok = false;
774          goto out;
775       }
776 
777       while (va_page < end_va_page) {
778          struct zink_sparse_backing *backing;
779          uint32_t backing_start;
780          uint32_t span_pages;
781 
782          /* Skip pages that are already uncommitted. */
783          if (!comm[va_page].backing) {
784             va_page++;
785             continue;
786          }
787 
788          /* Group contiguous spans of pages. */
789          backing = comm[va_page].backing;
790          backing_start = comm[va_page].page;
791          comm[va_page].backing = NULL;
792 
793          span_pages = 1;
794          va_page++;
795 
796          while (va_page < end_va_page &&
797                 comm[va_page].backing == backing &&
798                 comm[va_page].page == backing_start + span_pages) {
799             comm[va_page].backing = NULL;
800             va_page++;
801             span_pages++;
802          }
803 
804          if (!sparse_backing_free(screen, bo, backing, backing_start, span_pages)) {
805             /* Couldn't allocate tracking data structures, so we have to leak */
806             fprintf(stderr, "zink: leaking sparse backing memory\n");
807             ok = false;
808          }
809       }
810    }
811 out:
812 
813    simple_mtx_unlock(&bo->lock);
814    return ok;
815 }
816 
817 static const struct pb_vtbl bo_slab_vtbl = {
818    /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
819    (void*)bo_slab_destroy
820    /* other functions are never called */
821 };
822 
823 static struct pb_slab *
bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index,bool encrypted)824 bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index, bool encrypted)
825 {
826    struct zink_screen *screen = priv;
827    VkMemoryPropertyFlags domains = vk_domain_from_heap(heap);
828    uint32_t base_id;
829    unsigned slab_size = 0;
830    struct zink_slab *slab = CALLOC_STRUCT(zink_slab);
831 
832    if (!slab)
833       return NULL;
834 
835    //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
836       //screen->bo_slabs_encrypted : screen->bo_slabs;
837    struct pb_slabs *slabs = screen->pb.bo_slabs;
838 
839    /* Determine the slab buffer size. */
840    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
841       unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
842 
843       if (entry_size <= max_entry_size) {
844          /* The slab size is twice the size of the largest possible entry. */
845          slab_size = max_entry_size * 2;
846 
847          if (!util_is_power_of_two_nonzero(entry_size)) {
848             assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
849 
850             /* If the entry size is 3/4 of a power of two, we would waste space and not gain
851              * anything if we allocated only twice the power of two for the backing buffer:
852              *   2 * 3/4 = 1.5 usable with buffer size 2
853              *
854              * Allocating 5 times the entry size leads us to the next power of two and results
855              * in a much better memory utilization:
856              *   5 * 3/4 = 3.75 usable with buffer size 4
857              */
858             if (entry_size * 5 > slab_size)
859                slab_size = util_next_power_of_two(entry_size * 5);
860          }
861 
862          break;
863       }
864    }
865    assert(slab_size != 0);
866 
867    slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, heap, 0, NULL));
868    if (!slab->buffer)
869       goto fail;
870 
871    slab_size = slab->buffer->base.size;
872 
873    slab->base.num_entries = slab_size / entry_size;
874    slab->base.num_free = slab->base.num_entries;
875    slab->entry_size = entry_size;
876    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
877    if (!slab->entries)
878       goto fail_buffer;
879 
880    list_inithead(&slab->base.free);
881 
882 #ifdef _MSC_VER
883    /* C11 too hard for msvc, no __sync_fetch_and_add */
884    base_id = p_atomic_add_return(&screen->pb.next_bo_unique_id, slab->base.num_entries) - slab->base.num_entries;
885 #else
886    base_id = __sync_fetch_and_add(&screen->pb.next_bo_unique_id, slab->base.num_entries);
887 #endif
888    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
889       struct zink_bo *bo = &slab->entries[i];
890 
891       simple_mtx_init(&bo->lock, mtx_plain);
892       bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size));
893       bo->base.size = entry_size;
894       bo->base.vtbl = &bo_slab_vtbl;
895       bo->offset = slab->buffer->offset + i * entry_size;
896       bo->base.placement = domains;
897       bo->unique_id = base_id + i;
898       bo->u.slab.entry.slab = &slab->base;
899       bo->u.slab.entry.group_index = group_index;
900       bo->u.slab.entry.entry_size = entry_size;
901 
902       if (slab->buffer->mem) {
903          /* The slab is not suballocated. */
904          bo->u.slab.real = slab->buffer;
905       } else {
906          /* The slab is allocated out of a bigger slab. */
907          bo->u.slab.real = slab->buffer->u.slab.real;
908          assert(bo->u.slab.real->mem);
909       }
910 
911       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
912    }
913 
914    /* Wasted alignment due to slabs with 3/4 allocations being aligned to a power of two. */
915    assert(slab->base.num_entries * entry_size <= slab_size);
916 
917    return &slab->base;
918 
919 fail_buffer:
920    zink_bo_unref(screen, slab->buffer);
921 fail:
922    FREE(slab);
923    return NULL;
924 }
925 
926 static struct pb_slab *
bo_slab_alloc_normal(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)927 bo_slab_alloc_normal(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
928 {
929    return bo_slab_alloc(priv, heap, entry_size, group_index, false);
930 }
931 
932 bool
zink_bo_init(struct zink_screen * screen)933 zink_bo_init(struct zink_screen *screen)
934 {
935    uint64_t total_mem = 0;
936    for (uint32_t i = 0; i < screen->info.mem_props.memoryHeapCount; ++i)
937       total_mem += screen->info.mem_props.memoryHeaps[i].size;
938    /* Create managers. */
939    pb_cache_init(&screen->pb.bo_cache, ZINK_HEAP_MAX,
940                  500000, 2.0f, 0,
941                  total_mem / 8, screen,
942                  (void*)bo_destroy, (void*)bo_can_reclaim);
943 
944    unsigned min_slab_order = 8;  /* 256 bytes */
945    unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
946    unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
947                                             NUM_SLAB_ALLOCATORS;
948 
949    /* Divide the size order range among slab managers. */
950    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
951       unsigned min_order = min_slab_order;
952       unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
953                                 max_slab_order);
954 
955       if (!pb_slabs_init(&screen->pb.bo_slabs[i],
956                          min_order, max_order,
957                          ZINK_HEAP_MAX, true,
958                          screen,
959                          bo_can_reclaim_slab,
960                          bo_slab_alloc_normal,
961                          (void*)bo_slab_free)) {
962          return false;
963       }
964       min_slab_order = max_order + 1;
965    }
966    screen->pb.min_alloc_size = 1 << screen->pb.bo_slabs[0].min_order;
967    screen->pb.bo_export_table = util_hash_table_create_ptr_keys();
968    simple_mtx_init(&screen->pb.bo_export_table_lock, mtx_plain);
969    return true;
970 }
971 
972 void
zink_bo_deinit(struct zink_screen * screen)973 zink_bo_deinit(struct zink_screen *screen)
974 {
975    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
976       if (screen->pb.bo_slabs[i].groups)
977          pb_slabs_deinit(&screen->pb.bo_slabs[i]);
978    }
979    pb_cache_deinit(&screen->pb.bo_cache);
980    _mesa_hash_table_destroy(screen->pb.bo_export_table, NULL);
981    simple_mtx_destroy(&screen->pb.bo_export_table_lock);
982 }
983