1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based on amdgpu winsys.
6  * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
7  * Copyright © 2015 Advanced Micro Devices, Inc.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include <stdio.h>
30 
31 #include "radv_amdgpu_bo.h"
32 
33 #include <amdgpu.h>
34 #include "drm-uapi/amdgpu_drm.h"
35 #include <inttypes.h>
36 #include <pthread.h>
37 #include <unistd.h>
38 
39 #include "util/u_atomic.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 
43 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
44 
45 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)46 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
47 		     amdgpu_bo_handle bo,
48 		     uint64_t offset,
49 		     uint64_t size,
50 		     uint64_t addr,
51 		     uint32_t bo_flags,
52 		     uint64_t internal_flags,
53 		     uint32_t ops)
54 {
55 	uint64_t flags = internal_flags;
56 	if (bo) {
57 		flags = AMDGPU_VM_PAGE_READABLE |
58 		         AMDGPU_VM_PAGE_EXECUTABLE;
59 
60 		if ((bo_flags & RADEON_FLAG_VA_UNCACHED) &&
61 		    ws->info.chip_class >= GFX9)
62 			flags |= AMDGPU_VM_MTYPE_UC;
63 
64 		if (!(bo_flags & RADEON_FLAG_READ_ONLY))
65 			flags |= AMDGPU_VM_PAGE_WRITEABLE;
66 	}
67 
68 	size = align64(size, getpagesize());
69 
70 	return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
71 				   flags, ops);
72 }
73 
74 static void
radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)75 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
76                                const struct radv_amdgpu_map_range *range)
77 {
78 	uint64_t internal_flags = 0;
79 	assert(range->size);
80 
81 	if (!range->bo) {
82 		if (!bo->ws->info.has_sparse_vm_mappings)
83 			return;
84 
85 		internal_flags |= AMDGPU_VM_PAGE_PRT;
86 	} else
87 		p_atomic_inc(&range->bo->ref_count);
88 
89 	int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
90 				     range->bo_offset, range->size,
91 				     range->offset + bo->base.va, 0,
92 				     internal_flags, AMDGPU_VA_OP_MAP);
93 	if (r)
94 		abort();
95 }
96 
97 static void
radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo * bo,const struct radv_amdgpu_map_range * range)98 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
99                                  const struct radv_amdgpu_map_range *range)
100 {
101 	uint64_t internal_flags = 0;
102 	assert(range->size);
103 
104 	if (!range->bo) {
105 		if(!bo->ws->info.has_sparse_vm_mappings)
106 			return;
107 
108 		/* Even though this is an unmap, if we don't set this flag,
109 		   AMDGPU is going to complain about the missing buffer. */
110 		internal_flags |= AMDGPU_VM_PAGE_PRT;
111 	}
112 
113 	int r = radv_amdgpu_bo_va_op(bo->ws, range->bo ? range->bo->bo : NULL,
114 				     range->bo_offset, range->size,
115 				     range->offset + bo->base.va, 0, internal_flags,
116 				     AMDGPU_VA_OP_UNMAP);
117 	if (r)
118 		abort();
119 
120 	if (range->bo)
121 		radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
122 }
123 
bo_comparator(const void * ap,const void * bp)124 static int bo_comparator(const void *ap, const void *bp) {
125 	struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
126 	struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
127 	return (a > b) ? 1 : (a < b) ? -1 : 0;
128 }
129 
130 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)131 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
132 {
133 	if (bo->bo_capacity < bo->range_count) {
134 		uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
135 		struct radv_amdgpu_winsys_bo **bos =
136 			realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
137 		if (!bos)
138 			return VK_ERROR_OUT_OF_HOST_MEMORY;
139 		bo->bos = bos;
140 		bo->bo_capacity = new_count;
141 	}
142 
143 	uint32_t temp_bo_count = 0;
144 	for (uint32_t i = 0; i < bo->range_count; ++i)
145 		if (bo->ranges[i].bo)
146 			bo->bos[temp_bo_count++] = bo->ranges[i].bo;
147 
148 	qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
149 
150 	uint32_t final_bo_count = 1;
151 	for (uint32_t i = 1; i < temp_bo_count; ++i)
152 		if (bo->bos[i] != bo->bos[i - 1])
153 			bo->bos[final_bo_count++] = bo->bos[i];
154 
155 	bo->bo_count = final_bo_count;
156 
157 	return VK_SUCCESS;
158 }
159 
160 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)161 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
162                                    uint64_t offset, uint64_t size,
163                                    struct radeon_winsys_bo *_bo, uint64_t bo_offset)
164 {
165 	struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
166 	struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
167 	int range_count_delta, new_idx;
168 	int first = 0, last;
169 	struct radv_amdgpu_map_range new_first, new_last;
170 	VkResult result;
171 
172 	assert(parent->is_virtual);
173 	assert(!bo || !bo->is_virtual);
174 
175 	/* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
176 	if (parent->range_capacity - parent->range_count < 2) {
177 		uint32_t range_capacity = parent->range_capacity + 2;
178 		struct radv_amdgpu_map_range *ranges =
179 			realloc(parent->ranges,
180 				range_capacity * sizeof(struct radv_amdgpu_map_range));
181 		if (!ranges)
182 			return VK_ERROR_OUT_OF_HOST_MEMORY;
183 		parent->ranges = ranges;
184 		parent->range_capacity = range_capacity;
185 	}
186 
187 	/*
188 	 * [first, last] is exactly the range of ranges that either overlap the
189 	 * new parent, or are adjacent to it. This corresponds to the bind ranges
190 	 * that may change.
191 	 */
192 	while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
193 		++first;
194 
195 	last = first;
196 	while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size)
197 		++last;
198 
199 	/* Whether the first or last range are going to be totally removed or just
200 	 * resized/left alone. Note that in the case of first == last, we will split
201 	 * this into a part before and after the new range. The remove flag is then
202 	 * whether to not create the corresponding split part. */
203 	bool remove_first = parent->ranges[first].offset == offset;
204 	bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
205 	bool unmapped_first = false;
206 
207 	assert(parent->ranges[first].offset <= offset);
208 	assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
209 
210 	/* Try to merge the new range with the first range. */
211 	if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
212 		size += offset - parent->ranges[first].offset;
213 		offset = parent->ranges[first].offset;
214 		bo_offset = parent->ranges[first].bo_offset;
215 		remove_first = true;
216 	}
217 
218 	/* Try to merge the new range with the last range. */
219 	if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
220 		size = parent->ranges[last].offset + parent->ranges[last].size - offset;
221 		remove_last = true;
222 	}
223 
224 	range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
225 	new_idx = first + !remove_first;
226 
227 	/* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
228 	for (int i = first + 1; i < last; ++i)
229 		radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i);
230 
231 	/* If the first/last range are not left alone we unmap then and optionally map
232 	 * them again after modifications. Not that this implicitly can do the splitting
233 	 * if first == last. */
234 	new_first = parent->ranges[first];
235 	new_last = parent->ranges[last];
236 
237 	if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
238 		radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first);
239 		unmapped_first = true;
240 
241 		if (!remove_first) {
242 			new_first.size = offset - new_first.offset;
243 			radv_amdgpu_winsys_virtual_map(parent, &new_first);
244 		}
245 	}
246 
247 	if (parent->ranges[last].offset < offset + size || remove_last) {
248 		if (first != last || !unmapped_first)
249 			radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last);
250 
251 		if (!remove_last) {
252 			new_last.size -= offset + size - new_last.offset;
253 			new_last.offset = offset + size;
254 			radv_amdgpu_winsys_virtual_map(parent, &new_last);
255 		}
256 	}
257 
258 	/* Moves the range list after last to account for the changed number of ranges. */
259 	memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
260 	        sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
261 
262 	if (!remove_first)
263 		parent->ranges[first] = new_first;
264 
265 	if (!remove_last)
266 		parent->ranges[new_idx + 1] = new_last;
267 
268 	/* Actually set up the new range. */
269 	parent->ranges[new_idx].offset = offset;
270 	parent->ranges[new_idx].size = size;
271 	parent->ranges[new_idx].bo = bo;
272 	parent->ranges[new_idx].bo_offset = bo_offset;
273 
274 	radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx);
275 
276 	parent->range_count += range_count_delta;
277 
278 	result = radv_amdgpu_winsys_rebuild_bo_list(parent);
279 	if (result != VK_SUCCESS)
280 		return result;
281 
282 	return VK_SUCCESS;
283 }
284 
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo * _bo)285 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
286 {
287 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
288 	struct radv_amdgpu_winsys *ws = bo->ws;
289 
290 	if (p_atomic_dec_return(&bo->ref_count))
291 		return;
292 	if (bo->is_virtual) {
293 		for (uint32_t i = 0; i < bo->range_count; ++i) {
294 			radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i);
295 		}
296 		free(bo->bos);
297 		free(bo->ranges);
298 	} else {
299 		if (bo->ws->debug_all_bos) {
300 			pthread_mutex_lock(&bo->ws->global_bo_list_lock);
301 			list_del(&bo->global_list_item);
302 			bo->ws->num_buffers--;
303 			pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
304 		}
305 		radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va,
306 				     0, 0, AMDGPU_VA_OP_UNMAP);
307 		amdgpu_bo_free(bo->bo);
308 	}
309 
310 	if (bo->initial_domain & RADEON_DOMAIN_VRAM) {
311 		if (bo->base.vram_no_cpu_access) {
312 			p_atomic_add(&ws->allocated_vram,
313 				     -align64(bo->size, ws->info.gart_page_size));
314 		} else {
315 			p_atomic_add(&ws->allocated_vram_vis,
316 				     -align64(bo->size, ws->info.gart_page_size));
317 		}
318 	}
319 
320 	if (bo->initial_domain & RADEON_DOMAIN_GTT)
321 		p_atomic_add(&ws->allocated_gtt,
322 			     -align64(bo->size, ws->info.gart_page_size));
323 
324 	amdgpu_va_range_free(bo->va_handle);
325 	FREE(bo);
326 }
327 
radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo * bo)328 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo)
329 {
330 	struct radv_amdgpu_winsys *ws = bo->ws;
331 
332 	if (bo->ws->debug_all_bos) {
333 		pthread_mutex_lock(&ws->global_bo_list_lock);
334 		list_addtail(&bo->global_list_item, &ws->global_bo_list);
335 		ws->num_buffers++;
336 		pthread_mutex_unlock(&ws->global_bo_list_lock);
337 	}
338 }
339 
340 static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,unsigned flags,unsigned priority)341 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
342 			     uint64_t size,
343 			     unsigned alignment,
344 			     enum radeon_bo_domain initial_domain,
345 			     unsigned flags,
346 			     unsigned priority)
347 {
348 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
349 	struct radv_amdgpu_winsys_bo *bo;
350 	struct amdgpu_bo_alloc_request request = {0};
351 	struct radv_amdgpu_map_range *ranges = NULL;
352 	amdgpu_bo_handle buf_handle;
353 	uint64_t va = 0;
354 	amdgpu_va_handle va_handle;
355 	int r;
356 	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
357 	if (!bo) {
358 		return NULL;
359 	}
360 
361 	unsigned virt_alignment = alignment;
362 	if (size >= ws->info.pte_fragment_size)
363 		virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
364 
365 	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
366 				  size, virt_alignment, 0, &va, &va_handle,
367 				  (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
368 				   AMDGPU_VA_RANGE_HIGH);
369 	if (r)
370 		goto error_va_alloc;
371 
372 	bo->base.va = va;
373 	bo->va_handle = va_handle;
374 	bo->size = size;
375 	bo->ws = ws;
376 	bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
377 	bo->ref_count = 1;
378 
379 	if (flags & RADEON_FLAG_VIRTUAL) {
380 		ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
381 		if (!ranges)
382 			goto error_ranges_alloc;
383 
384 		bo->ranges = ranges;
385 		bo->range_count = 1;
386 		bo->range_capacity = 1;
387 
388 		bo->ranges[0].offset = 0;
389 		bo->ranges[0].size = size;
390 		bo->ranges[0].bo = NULL;
391 		bo->ranges[0].bo_offset = 0;
392 
393 		radv_amdgpu_winsys_virtual_map(bo, bo->ranges);
394 		return (struct radeon_winsys_bo *)bo;
395 	}
396 
397 	request.alloc_size = size;
398 	request.phys_alignment = alignment;
399 
400 	if (initial_domain & RADEON_DOMAIN_VRAM) {
401 		request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
402 
403 		/* Since VRAM and GTT have almost the same performance on
404 		 * APUs, we could just set GTT. However, in order to decrease
405 		 * GTT(RAM) usage, which is shared with the OS, allow VRAM
406 		 * placements too. The idea is not to use VRAM usefully, but
407 		 * to use it so that it's not unused and wasted.
408 		 */
409 		if (!ws->info.has_dedicated_vram)
410 			request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
411 	}
412 
413 	if (initial_domain & RADEON_DOMAIN_GTT)
414 		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
415 	if (initial_domain & RADEON_DOMAIN_GDS)
416 		request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
417 	if (initial_domain & RADEON_DOMAIN_OA)
418 		request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
419 
420 	if (flags & RADEON_FLAG_CPU_ACCESS)
421 		request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
422 	if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
423 		bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
424 		request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
425 	}
426 	if (flags & RADEON_FLAG_GTT_WC)
427 		request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
428 	if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
429 		request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
430 	if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
431 	    ws->info.has_local_buffers &&
432 	    (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
433 		bo->base.is_local = true;
434 		request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
435 	}
436 
437 	/* this won't do anything on pre 4.9 kernels */
438 	if (initial_domain & RADEON_DOMAIN_VRAM) {
439 		if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
440 			request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
441 	}
442 
443 	r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
444 	if (r) {
445 		fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
446 		fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
447 		fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
448 		fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
449 		goto error_bo_alloc;
450 	}
451 
452 	r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0,
453 				 AMDGPU_VA_OP_MAP);
454 	if (r)
455 		goto error_va_map;
456 
457 	bo->bo = buf_handle;
458 	bo->initial_domain = initial_domain;
459 	bo->is_shared = false;
460 	bo->priority = priority;
461 
462 	r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
463 	assert(!r);
464 
465 	if (initial_domain & RADEON_DOMAIN_VRAM) {
466 		/* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
467 		 * aren't mappable and they are counted as part of the VRAM
468 		 * counter.
469 		 *
470 		 * Otherwise, buffers with the CPU_ACCESS flag or without any
471 		 * of both (imported buffers) are counted as part of the VRAM
472 		 * visible counter because they can be mapped.
473 		 */
474 		if (bo->base.vram_no_cpu_access) {
475 			p_atomic_add(&ws->allocated_vram,
476 				     align64(bo->size, ws->info.gart_page_size));
477 		} else {
478 			p_atomic_add(&ws->allocated_vram_vis,
479 				     align64(bo->size, ws->info.gart_page_size));
480 		}
481 	}
482 
483 	if (initial_domain & RADEON_DOMAIN_GTT)
484 		p_atomic_add(&ws->allocated_gtt,
485 			     align64(bo->size, ws->info.gart_page_size));
486 
487 	radv_amdgpu_add_buffer_to_global_list(bo);
488 	return (struct radeon_winsys_bo *)bo;
489 error_va_map:
490 	amdgpu_bo_free(buf_handle);
491 
492 error_bo_alloc:
493 	free(ranges);
494 
495 error_ranges_alloc:
496 	amdgpu_va_range_free(va_handle);
497 
498 error_va_alloc:
499 	FREE(bo);
500 	return NULL;
501 }
502 
503 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo * _bo)504 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
505 {
506 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
507 	int ret;
508 	void *data;
509 	ret = amdgpu_bo_cpu_map(bo->bo, &data);
510 	if (ret)
511 		return NULL;
512 	return data;
513 }
514 
515 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo * _bo)516 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
517 {
518 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
519 	amdgpu_bo_cpu_unmap(bo->bo);
520 }
521 
522 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)523 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
524 				     uint64_t size, unsigned alignment)
525 {
526 	uint64_t vm_alignment = alignment;
527 
528 	/* Increase the VM alignment for faster address translation. */
529 	if (size >= ws->info.pte_fragment_size)
530 		vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
531 
532 	/* Gfx9: Increase the VM alignment to the most significant bit set
533 	 * in the size for faster address translation.
534 	 */
535 	if (ws->info.chip_class >= GFX9) {
536 		unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
537 		uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
538 
539 		vm_alignment = MAX2(vm_alignment, msb_alignment);
540 	}
541 	return vm_alignment;
542 }
543 
544 static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority)545 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
546                                void *pointer,
547                                uint64_t size,
548 			       unsigned priority)
549 {
550 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
551 	amdgpu_bo_handle buf_handle;
552 	struct radv_amdgpu_winsys_bo *bo;
553 	uint64_t va;
554 	amdgpu_va_handle va_handle;
555 	uint64_t vm_alignment;
556 
557 	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
558 	if (!bo)
559 		return NULL;
560 
561 	if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
562 		goto error;
563 
564 	/* Using the optimal VM alignment also fixes GPU hangs for buffers that
565 	 * are imported.
566 	 */
567 	vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
568 							    ws->info.gart_page_size);
569 
570 	if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
571 	                          size, vm_alignment, 0, &va, &va_handle,
572 				  AMDGPU_VA_RANGE_HIGH))
573 		goto error_va_alloc;
574 
575 	if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
576 		goto error_va_map;
577 
578 	/* Initialize it */
579 	bo->base.va = va;
580 	bo->va_handle = va_handle;
581 	bo->size = size;
582 	bo->ref_count = 1;
583 	bo->ws = ws;
584 	bo->bo = buf_handle;
585 	bo->initial_domain = RADEON_DOMAIN_GTT;
586 	bo->priority = priority;
587 
588 	ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
589 	assert(!r);
590 
591 	p_atomic_add(&ws->allocated_gtt,
592 		     align64(bo->size, ws->info.gart_page_size));
593 
594 	radv_amdgpu_add_buffer_to_global_list(bo);
595 	return (struct radeon_winsys_bo *)bo;
596 
597 error_va_map:
598 	amdgpu_va_range_free(va_handle);
599 
600 error_va_alloc:
601 	amdgpu_bo_free(buf_handle);
602 
603 error:
604 	FREE(bo);
605 	return NULL;
606 }
607 
608 static struct radeon_winsys_bo *
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,uint64_t * alloc_size)609 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
610 			      int fd, unsigned priority,
611 			      uint64_t *alloc_size)
612 {
613 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
614 	struct radv_amdgpu_winsys_bo *bo;
615 	uint64_t va;
616 	amdgpu_va_handle va_handle;
617 	enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
618 	struct amdgpu_bo_import_result result = {0};
619 	struct amdgpu_bo_info info = {0};
620 	enum radeon_bo_domain initial = 0;
621 	int r;
622 	bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
623 	if (!bo)
624 		return NULL;
625 
626 	r = amdgpu_bo_import(ws->dev, type, fd, &result);
627 	if (r)
628 		goto error;
629 
630 	r = amdgpu_bo_query_info(result.buf_handle, &info);
631 	if (r)
632 		goto error_query;
633 
634 	if (alloc_size) {
635 		*alloc_size = info.alloc_size;
636 	}
637 
638 	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
639 				  result.alloc_size, 1 << 20, 0, &va, &va_handle,
640 				  AMDGPU_VA_RANGE_HIGH);
641 	if (r)
642 		goto error_query;
643 
644 	r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
645 				 va, 0, 0, AMDGPU_VA_OP_MAP);
646 	if (r)
647 		goto error_va_map;
648 
649 	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
650 		initial |= RADEON_DOMAIN_VRAM;
651 	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
652 		initial |= RADEON_DOMAIN_GTT;
653 
654 	bo->bo = result.buf_handle;
655 	bo->base.va = va;
656 	bo->va_handle = va_handle;
657 	bo->initial_domain = initial;
658 	bo->size = result.alloc_size;
659 	bo->is_shared = true;
660 	bo->ws = ws;
661 	bo->priority = priority;
662 	bo->ref_count = 1;
663 
664 	r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
665 	assert(!r);
666 
667 	if (bo->initial_domain & RADEON_DOMAIN_VRAM)
668 		p_atomic_add(&ws->allocated_vram,
669 			     align64(bo->size, ws->info.gart_page_size));
670 	if (bo->initial_domain & RADEON_DOMAIN_GTT)
671 		p_atomic_add(&ws->allocated_gtt,
672 			     align64(bo->size, ws->info.gart_page_size));
673 
674 	radv_amdgpu_add_buffer_to_global_list(bo);
675 	return (struct radeon_winsys_bo *)bo;
676 error_va_map:
677 	amdgpu_va_range_free(va_handle);
678 
679 error_query:
680 	amdgpu_bo_free(result.buf_handle);
681 
682 error:
683 	FREE(bo);
684 	return NULL;
685 }
686 
687 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)688 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
689 			  struct radeon_winsys_bo *_bo,
690 			  int *fd)
691 {
692 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
693 	enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
694 	int r;
695 	unsigned handle;
696 	r = amdgpu_bo_export(bo->bo, type, &handle);
697 	if (r)
698 		return false;
699 
700 	*fd = (int)handle;
701 	bo->is_shared = true;
702 	return true;
703 }
704 
705 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)706 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
707                                  enum radeon_bo_domain *domains,
708                                  enum radeon_bo_flag *flags)
709 {
710 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
711 	struct amdgpu_bo_import_result result = {0};
712 	struct amdgpu_bo_info info = {0};
713 	int r;
714 
715 	*domains = 0;
716 	*flags = 0;
717 
718 	r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
719 	if (r)
720 		return false;
721 
722 	r = amdgpu_bo_query_info(result.buf_handle, &info);
723 	amdgpu_bo_free(result.buf_handle);
724 	if (r)
725 		return false;
726 
727 	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
728 		*domains |= RADEON_DOMAIN_VRAM;
729 	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
730 		*domains |= RADEON_DOMAIN_GTT;
731 	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
732 		*domains |= RADEON_DOMAIN_GDS;
733 	if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
734 		*domains |= RADEON_DOMAIN_OA;
735 
736 	if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
737 		*flags |= RADEON_FLAG_CPU_ACCESS;
738 	if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
739 		*flags |= RADEON_FLAG_NO_CPU_ACCESS;
740 	if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
741 		*flags |= RADEON_FLAG_IMPLICIT_SYNC;
742 	if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
743 		*flags |= RADEON_FLAG_GTT_WC;
744 	if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
745 		*flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
746 	if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
747 		*flags |= RADEON_FLAG_ZERO_VRAM;
748 	return true;
749 }
750 
eg_tile_split(unsigned tile_split)751 static unsigned eg_tile_split(unsigned tile_split)
752 {
753 	switch (tile_split) {
754 	case 0:     tile_split = 64;    break;
755 	case 1:     tile_split = 128;   break;
756 	case 2:     tile_split = 256;   break;
757 	case 3:     tile_split = 512;   break;
758 	default:
759 	case 4:     tile_split = 1024;  break;
760 	case 5:     tile_split = 2048;  break;
761 	case 6:     tile_split = 4096;  break;
762 	}
763 	return tile_split;
764 }
765 
radv_eg_tile_split_rev(unsigned eg_tile_split)766 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
767 {
768 	switch (eg_tile_split) {
769 	case 64:    return 0;
770 	case 128:   return 1;
771 	case 256:   return 2;
772 	case 512:   return 3;
773 	default:
774 	case 1024:  return 4;
775 	case 2048:  return 5;
776 	case 4096:  return 6;
777 	}
778 }
779 
780 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)781 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
782 				   struct radeon_bo_metadata *md)
783 {
784 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
785 	struct amdgpu_bo_metadata metadata = {0};
786 	uint64_t tiling_flags = 0;
787 
788 	if (bo->ws->info.chip_class >= GFX9) {
789 		tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
790 		tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
791 	} else {
792 		if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
793 			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
794 		else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
795 			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
796 		else
797 			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
798 
799 		tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
800 		tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
801 		tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
802 		if (md->u.legacy.tile_split)
803 			tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
804 		tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
805 		tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
806 
807 		if (md->u.legacy.scanout)
808 			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
809 		else
810 			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
811 	}
812 
813 	metadata.tiling_info = tiling_flags;
814 	metadata.size_metadata = md->size_metadata;
815 	memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
816 
817 	amdgpu_bo_set_metadata(bo->bo, &metadata);
818 }
819 
820 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)821 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys_bo *_bo,
822                                    struct radeon_bo_metadata *md)
823 {
824 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
825 	struct amdgpu_bo_info info = {0};
826 
827 	int r = amdgpu_bo_query_info(bo->bo, &info);
828 	if (r)
829 		return;
830 
831 	uint64_t tiling_flags = info.metadata.tiling_info;
832 
833 	if (bo->ws->info.chip_class >= GFX9) {
834 		md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
835 		md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
836 	} else {
837 		md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
838 		md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
839 
840 		if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4)  /* 2D_TILED_THIN1 */
841 			md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
842 		else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
843 			md->u.legacy.microtile = RADEON_LAYOUT_TILED;
844 
845 		md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
846 		md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
847 		md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
848 		md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
849 		md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
850 		md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
851 		md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
852 	}
853 
854 	md->size_metadata = info.metadata.size_metadata;
855 	memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
856 }
857 
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)858 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
859 {
860 	ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
861 	ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
862 	ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
863 	ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
864 	ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
865 	ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
866 	ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
867 	ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
868 	ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
869 	ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
870 	ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
871 }
872