1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "dirent.h"
29 
30 #include <stdatomic.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35 
36 #include "radv_debug.h"
37 #include "radv_private.h"
38 #include "radv_shader.h"
39 #include "radv_cs.h"
40 #include "util/disk_cache.h"
41 #include "vk_util.h"
42 #include <xf86drm.h>
43 #include <amdgpu.h>
44 #include "drm-uapi/amdgpu_drm.h"
45 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
46 #include "winsys/null/radv_null_winsys_public.h"
47 #include "ac_llvm_util.h"
48 #include "vk_format.h"
49 #include "sid.h"
50 #include "git_sha1.h"
51 #include "util/build_id.h"
52 #include "util/debug.h"
53 #include "util/mesa-sha1.h"
54 #include "util/timespec.h"
55 #include "util/u_atomic.h"
56 #include "compiler/glsl_types.h"
57 #include "util/driconf.h"
58 
59 #if DETECT_OS_FREEBSD
60 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
61 #endif
62 
63 static struct radv_timeline_point *
64 radv_timeline_find_point_at_least_locked(struct radv_device *device,
65                                          struct radv_timeline *timeline,
66                                          uint64_t p);
67 
68 static struct radv_timeline_point *
69 radv_timeline_add_point_locked(struct radv_device *device,
70                                struct radv_timeline *timeline,
71                                uint64_t p);
72 
73 static void
74 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
75                                      struct list_head *processing_list);
76 
77 static
78 void radv_destroy_semaphore_part(struct radv_device *device,
79                                  struct radv_semaphore_part *part);
80 
81 static VkResult
82 radv_create_pthread_cond(pthread_cond_t *cond);
83 
radv_get_current_time(void)84 uint64_t radv_get_current_time(void)
85 {
86 	struct timespec tv;
87 	clock_gettime(CLOCK_MONOTONIC, &tv);
88 	return tv.tv_nsec + tv.tv_sec*1000000000ull;
89 }
90 
radv_get_absolute_timeout(uint64_t timeout)91 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
92 {
93 	uint64_t current_time = radv_get_current_time();
94 
95 	timeout = MIN2(UINT64_MAX - current_time, timeout);
96 
97 	return current_time + timeout;
98 }
99 
100 static int
radv_device_get_cache_uuid(enum radeon_family family,void * uuid)101 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
102 {
103 	struct mesa_sha1 ctx;
104 	unsigned char sha1[20];
105 	unsigned ptr_size = sizeof(void*);
106 
107 	memset(uuid, 0, VK_UUID_SIZE);
108 	_mesa_sha1_init(&ctx);
109 
110 	if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
111 	    !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
112 		return -1;
113 
114 	_mesa_sha1_update(&ctx, &family, sizeof(family));
115 	_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
116 	_mesa_sha1_final(&ctx, sha1);
117 
118 	memcpy(uuid, sha1, VK_UUID_SIZE);
119 	return 0;
120 }
121 
122 static void
radv_get_driver_uuid(void * uuid)123 radv_get_driver_uuid(void *uuid)
124 {
125 	ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
126 }
127 
128 static void
radv_get_device_uuid(struct radeon_info * info,void * uuid)129 radv_get_device_uuid(struct radeon_info *info, void *uuid)
130 {
131 	ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
132 }
133 
134 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * device)135 radv_get_visible_vram_size(struct radv_physical_device *device)
136 {
137 	return MIN2(device->rad_info.vram_size, device->rad_info.vram_vis_size);
138 }
139 
140 static uint64_t
radv_get_vram_size(struct radv_physical_device * device)141 radv_get_vram_size(struct radv_physical_device *device)
142 {
143 	return device->rad_info.vram_size - radv_get_visible_vram_size(device);
144 }
145 
146 static void
radv_physical_device_init_mem_types(struct radv_physical_device * device)147 radv_physical_device_init_mem_types(struct radv_physical_device *device)
148 {
149 	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
150 	uint64_t vram_size = radv_get_vram_size(device);
151 	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
152 	device->memory_properties.memoryHeapCount = 0;
153 	if (vram_size > 0) {
154 		vram_index = device->memory_properties.memoryHeapCount++;
155 		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
156 			.size = vram_size,
157 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
158 		};
159 	}
160 
161 	if (device->rad_info.gart_size > 0) {
162 		gart_index = device->memory_properties.memoryHeapCount++;
163 		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
164 			.size = device->rad_info.gart_size,
165 			.flags = 0,
166 		};
167 	}
168 
169 	if (visible_vram_size) {
170 		visible_vram_index = device->memory_properties.memoryHeapCount++;
171 		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
172 			.size = visible_vram_size,
173 			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
174 		};
175 	}
176 
177 	unsigned type_count = 0;
178 
179 	if (vram_index >= 0 || visible_vram_index >= 0) {
180 		device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
181 		device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
182 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
183 			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
184 			.heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
185 		};
186 	}
187 
188 	if (gart_index >= 0) {
189 		device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
190 		device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
191 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
192 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
193 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
194 			.heapIndex = gart_index,
195 		};
196 	}
197 	if (visible_vram_index >= 0) {
198 		device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
199 		device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
200 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
201 			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
202 			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
203 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
204 			.heapIndex = visible_vram_index,
205 		};
206 	}
207 
208 	if (gart_index >= 0) {
209 		device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
210 		device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
211 		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
212 			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
213 			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
214 			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
215 			.heapIndex = gart_index,
216 		};
217 	}
218 	device->memory_properties.memoryTypeCount = type_count;
219 
220 	if (device->rad_info.has_l2_uncached) {
221 		for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
222 			VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
223 
224 			if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
225 						       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
226 			    mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
227 
228 				VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
229 					VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
230 					VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
231 
232 				device->memory_domains[type_count] = device->memory_domains[i];
233 				device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
234 				device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
235 					.propertyFlags = property_flags,
236 					.heapIndex = mem_type.heapIndex,
237 				};
238 			}
239 		}
240 		device->memory_properties.memoryTypeCount = type_count;
241 	}
242 }
243 
244 static const char *
radv_get_compiler_string(struct radv_physical_device * pdevice)245 radv_get_compiler_string(struct radv_physical_device *pdevice)
246 {
247 	if (!pdevice->use_llvm) {
248 		/* Some games like SotTR apply shader workarounds if the LLVM
249 		 * version is too old or if the LLVM version string is
250 		 * missing. This gives 2-5% performance with SotTR and ACO.
251 		 */
252 		if (driQueryOptionb(&pdevice->instance->dri_options,
253 				    "radv_report_llvm9_version_string")) {
254 			return "ACO/LLVM 9.0.1";
255 		}
256 
257 		return "ACO";
258 	}
259 
260 	return "LLVM " MESA_LLVM_VERSION_STRING;
261 }
262 
263 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** device_out)264 radv_physical_device_try_create(struct radv_instance *instance,
265 				drmDevicePtr drm_device,
266 				struct radv_physical_device **device_out)
267 {
268 	VkResult result;
269 	int fd = -1;
270 	int master_fd = -1;
271 
272 	if (drm_device) {
273 		const char *path = drm_device->nodes[DRM_NODE_RENDER];
274 		drmVersionPtr version;
275 
276 		fd = open(path, O_RDWR | O_CLOEXEC);
277 		if (fd < 0) {
278 			if (instance->debug_flags & RADV_DEBUG_STARTUP)
279 				radv_logi("Could not open device '%s'", path);
280 
281 			return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
282 		}
283 
284 		version = drmGetVersion(fd);
285 		if (!version) {
286 			close(fd);
287 
288 			if (instance->debug_flags & RADV_DEBUG_STARTUP)
289 				radv_logi("Could not get the kernel driver version for device '%s'", path);
290 
291 			return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
292 					 "failed to get version %s: %m", path);
293 		}
294 
295 		if (strcmp(version->name, "amdgpu")) {
296 			drmFreeVersion(version);
297 			close(fd);
298 
299 			if (instance->debug_flags & RADV_DEBUG_STARTUP)
300 				radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
301 
302 			return VK_ERROR_INCOMPATIBLE_DRIVER;
303 		}
304 		drmFreeVersion(version);
305 
306 		if (instance->debug_flags & RADV_DEBUG_STARTUP)
307 				radv_logi("Found compatible device '%s'.", path);
308 	}
309 
310 	struct radv_physical_device *device =
311 		vk_zalloc2(&instance->alloc, NULL, sizeof(*device), 8,
312 			   VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
313 	if (!device) {
314 		result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
315 		goto fail_fd;
316 	}
317 
318 	device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
319 	device->instance = instance;
320 
321 	if (drm_device) {
322 		device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
323 						       instance->perftest_flags);
324 	} else {
325 		device->ws = radv_null_winsys_create();
326 	}
327 
328 	if (!device->ws) {
329 		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
330 				   "failed to initialize winsys");
331 		goto fail_alloc;
332 	}
333 
334 	if (drm_device && instance->enabled_extensions.KHR_display) {
335 		master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
336 		if (master_fd >= 0) {
337 			uint32_t accel_working = 0;
338 			struct drm_amdgpu_info request = {
339 				.return_pointer = (uintptr_t)&accel_working,
340 				.return_size = sizeof(accel_working),
341 				.query = AMDGPU_INFO_ACCEL_WORKING
342 			};
343 
344 			if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
345 				close(master_fd);
346 				master_fd = -1;
347 			}
348 		}
349 	}
350 
351 	device->master_fd = master_fd;
352 	device->local_fd = fd;
353 	device->ws->query_info(device->ws, &device->rad_info);
354 
355 	device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
356 
357 	snprintf(device->name, sizeof(device->name),
358 		 "AMD RADV %s (%s)",
359 		 device->rad_info.name, radv_get_compiler_string(device));
360 
361 	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
362 		result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
363 				   "cannot generate UUID");
364 		goto fail_wsi;
365 	}
366 
367 	/* These flags affect shader compilation. */
368 	uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
369 
370 	/* The gpu id is already embedded in the uuid so we just pass "radv"
371 	 * when creating the cache.
372 	 */
373 	char buf[VK_UUID_SIZE * 2 + 1];
374 	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
375 	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
376 
377 	if (device->rad_info.chip_class < GFX8)
378 		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
379 
380 	radv_get_driver_uuid(&device->driver_uuid);
381 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
382 
383 	device->out_of_order_rast_allowed = device->rad_info.has_out_of_order_rast &&
384 					    !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
385 
386 	device->dcc_msaa_allowed =
387 		(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
388 
389 	device->use_ngg = device->rad_info.chip_class >= GFX10 &&
390 			  device->rad_info.family != CHIP_NAVI14 &&
391 			  !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
392 
393 	/* TODO: Implement NGG GS with ACO. */
394 	device->use_ngg_gs = device->use_ngg && device->use_llvm;
395 	device->use_ngg_streamout = false;
396 
397 	/* Determine the number of threads per wave for all stages. */
398 	device->cs_wave_size = 64;
399 	device->ps_wave_size = 64;
400 	device->ge_wave_size = 64;
401 
402 	if (device->rad_info.chip_class >= GFX10) {
403 		if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
404 			device->cs_wave_size = 32;
405 
406 		/* For pixel shaders, wave64 is recommanded. */
407 		if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
408 			device->ps_wave_size = 32;
409 
410 		if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
411 			device->ge_wave_size = 32;
412 	}
413 
414 	radv_physical_device_init_mem_types(device);
415 
416 	radv_physical_device_get_supported_extensions(device,
417 						      &device->supported_extensions);
418 
419 	if (drm_device)
420 		device->bus_info = *drm_device->businfo.pci;
421 
422 	if ((device->instance->debug_flags & RADV_DEBUG_INFO))
423 		ac_print_gpu_info(&device->rad_info);
424 
425 	/* The WSI is structured as a layer on top of the driver, so this has
426 	 * to be the last part of initialization (at least until we get other
427 	 * semi-layers).
428 	 */
429 	result = radv_init_wsi(device);
430 	if (result != VK_SUCCESS) {
431 		vk_error(instance, result);
432 		goto fail_disk_cache;
433 	}
434 
435 	*device_out = device;
436 
437 	return VK_SUCCESS;
438 
439 fail_disk_cache:
440 	disk_cache_destroy(device->disk_cache);
441 fail_wsi:
442 	device->ws->destroy(device->ws);
443 fail_alloc:
444 	vk_free(&instance->alloc, device);
445 fail_fd:
446 	if (fd != -1)
447 		close(fd);
448 	if (master_fd != -1)
449 		close(master_fd);
450 	return result;
451 }
452 
453 static void
radv_physical_device_destroy(struct radv_physical_device * device)454 radv_physical_device_destroy(struct radv_physical_device *device)
455 {
456 	radv_finish_wsi(device);
457 	device->ws->destroy(device->ws);
458 	disk_cache_destroy(device->disk_cache);
459 	close(device->local_fd);
460 	if (device->master_fd != -1)
461 		close(device->master_fd);
462 	vk_free(&device->instance->alloc, device);
463 }
464 
465 static void *
default_alloc_func(void * pUserData,size_t size,size_t align,VkSystemAllocationScope allocationScope)466 default_alloc_func(void *pUserData, size_t size, size_t align,
467                    VkSystemAllocationScope allocationScope)
468 {
469 	return malloc(size);
470 }
471 
472 static void *
default_realloc_func(void * pUserData,void * pOriginal,size_t size,size_t align,VkSystemAllocationScope allocationScope)473 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
474                      size_t align, VkSystemAllocationScope allocationScope)
475 {
476 	return realloc(pOriginal, size);
477 }
478 
479 static void
default_free_func(void * pUserData,void * pMemory)480 default_free_func(void *pUserData, void *pMemory)
481 {
482 	free(pMemory);
483 }
484 
485 static const VkAllocationCallbacks default_alloc = {
486 	.pUserData = NULL,
487 	.pfnAllocation = default_alloc_func,
488 	.pfnReallocation = default_realloc_func,
489 	.pfnFree = default_free_func,
490 };
491 
492 static const struct debug_control radv_debug_options[] = {
493 	{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
494 	{"nodcc", RADV_DEBUG_NO_DCC},
495 	{"shaders", RADV_DEBUG_DUMP_SHADERS},
496 	{"nocache", RADV_DEBUG_NO_CACHE},
497 	{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
498 	{"nohiz", RADV_DEBUG_NO_HIZ},
499 	{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
500 	{"allbos", RADV_DEBUG_ALL_BOS},
501 	{"noibs", RADV_DEBUG_NO_IBS},
502 	{"spirv", RADV_DEBUG_DUMP_SPIRV},
503 	{"vmfaults", RADV_DEBUG_VM_FAULTS},
504 	{"zerovram", RADV_DEBUG_ZERO_VRAM},
505 	{"syncshaders", RADV_DEBUG_SYNC_SHADERS},
506 	{"preoptir", RADV_DEBUG_PREOPTIR},
507 	{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
508 	{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
509 	{"info", RADV_DEBUG_INFO},
510 	{"errors", RADV_DEBUG_ERRORS},
511 	{"startup", RADV_DEBUG_STARTUP},
512 	{"checkir", RADV_DEBUG_CHECKIR},
513 	{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
514 	{"nobinning", RADV_DEBUG_NOBINNING},
515 	{"nongg", RADV_DEBUG_NO_NGG},
516 	{"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
517 	{"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
518 	{"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
519 	{"llvm", RADV_DEBUG_LLVM},
520 	{NULL, 0}
521 };
522 
523 const char *
radv_get_debug_option_name(int id)524 radv_get_debug_option_name(int id)
525 {
526 	assert(id < ARRAY_SIZE(radv_debug_options) - 1);
527 	return radv_debug_options[id].string;
528 }
529 
530 static const struct debug_control radv_perftest_options[] = {
531 	{"localbos", RADV_PERFTEST_LOCAL_BOS},
532 	{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
533 	{"bolist", RADV_PERFTEST_BO_LIST},
534 	{"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
535 	{"cswave32", RADV_PERFTEST_CS_WAVE_32},
536 	{"pswave32", RADV_PERFTEST_PS_WAVE_32},
537 	{"gewave32", RADV_PERFTEST_GE_WAVE_32},
538 	{"dfsm", RADV_PERFTEST_DFSM},
539 	{NULL, 0}
540 };
541 
542 const char *
radv_get_perftest_option_name(int id)543 radv_get_perftest_option_name(int id)
544 {
545 	assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
546 	return radv_perftest_options[id].string;
547 }
548 
549 static void
radv_handle_per_app_options(struct radv_instance * instance,const VkApplicationInfo * info)550 radv_handle_per_app_options(struct radv_instance *instance,
551 			    const VkApplicationInfo *info)
552 {
553 	const char *name = info ? info->pApplicationName : NULL;
554 	const char *engine_name = info ? info->pEngineName : NULL;
555 
556 	if (name) {
557 		if (!strcmp(name, "DOOM_VFR")) {
558 			/* Work around a Doom VFR game bug */
559 			instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
560 		} else if (!strcmp(name, "Fledge")) {
561 			/*
562 			 * Zero VRAM for "The Surge 2"
563 			 *
564 			 * This avoid a hang when when rendering any level. Likely
565 			 * uninitialized data in an indirect draw.
566 			 */
567 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
568 		} else if (!strcmp(name, "No Man's Sky")) {
569 			/* Work around a NMS game bug */
570 			instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
571 		} else if (!strcmp(name, "DOOMEternal")) {
572 			/* Zero VRAM for Doom Eternal to fix rendering issues. */
573 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
574 		} else if (!strcmp(name, "Red Dead Redemption 2")) {
575 			/* Work around a RDR2 game bug */
576 			instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
577 		}
578 	}
579 
580 	if (engine_name) {
581 		if (!strcmp(engine_name, "vkd3d")) {
582 			/* Zero VRAM for all VKD3D (DX12->VK) games to fix
583 			 * rendering issues.
584 			 */
585 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
586 		} else if (!strcmp(engine_name, "Quantic Dream Engine")) {
587 			/* Fix various artifacts in Detroit: Become Human */
588 			instance->debug_flags |= RADV_DEBUG_ZERO_VRAM |
589 			                         RADV_DEBUG_DISCARD_TO_DEMOTE;
590 		}
591 	}
592 
593 	instance->enable_mrt_output_nan_fixup =
594 		driQueryOptionb(&instance->dri_options,
595 				"radv_enable_mrt_output_nan_fixup");
596 
597 	if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
598 		instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
599 }
600 
601 static const char radv_dri_options_xml[] =
602 DRI_CONF_BEGIN
603 	DRI_CONF_SECTION_PERFORMANCE
604 		DRI_CONF_ADAPTIVE_SYNC("true")
605 		DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
606 		DRI_CONF_VK_X11_STRICT_IMAGE_COUNT("false")
607 		DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT("false")
608 		DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING("false")
609 		DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP("false")
610 		DRI_CONF_RADV_NO_DYNAMIC_BOUNDS("false")
611 		DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
612 	DRI_CONF_SECTION_END
613 
614 	DRI_CONF_SECTION_DEBUG
615 		DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST("false")
616 	DRI_CONF_SECTION_END
617 DRI_CONF_END;
618 
radv_init_dri_options(struct radv_instance * instance)619 static void  radv_init_dri_options(struct radv_instance *instance)
620 {
621 	driParseOptionInfo(&instance->available_dri_options, radv_dri_options_xml);
622 	driParseConfigFiles(&instance->dri_options,
623 	                    &instance->available_dri_options,
624 	                    0, "radv", NULL,
625 	                    instance->applicationName,
626 	                    instance->applicationVersion,
627 	                    instance->engineName,
628 	                    instance->engineVersion);
629 }
630 
radv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)631 VkResult radv_CreateInstance(
632 	const VkInstanceCreateInfo*                 pCreateInfo,
633 	const VkAllocationCallbacks*                pAllocator,
634 	VkInstance*                                 pInstance)
635 {
636 	struct radv_instance *instance;
637 	VkResult result;
638 
639 	instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
640 			      VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
641 	if (!instance)
642 		return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
643 
644 	vk_object_base_init(NULL, &instance->base, VK_OBJECT_TYPE_INSTANCE);
645 
646 	if (pAllocator)
647 		instance->alloc = *pAllocator;
648 	else
649 		instance->alloc = default_alloc;
650 
651 	if (pCreateInfo->pApplicationInfo) {
652 		const VkApplicationInfo *app = pCreateInfo->pApplicationInfo;
653 
654 		instance->applicationName =
655 			vk_strdup(&instance->alloc, app->pApplicationName,
656 				  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
657 		instance->applicationVersion = app->applicationVersion;
658 
659 		instance->engineName =
660 			vk_strdup(&instance->alloc, app->pEngineName,
661 				  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
662 		instance->engineVersion = app->engineVersion;
663 		instance->apiVersion = app->apiVersion;
664 	}
665 
666 	if (instance->apiVersion == 0)
667 		instance->apiVersion = VK_API_VERSION_1_0;
668 
669 	instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
670 						   radv_debug_options);
671 
672 	const char *radv_perftest_str = getenv("RADV_PERFTEST");
673 	instance->perftest_flags = parse_debug_string(radv_perftest_str,
674 						      radv_perftest_options);
675 
676 	if (radv_perftest_str) {
677 		/* Output warnings for famous RADV_PERFTEST options that no
678 		 * longer exist or are deprecated.
679 		 */
680 		if (strstr(radv_perftest_str, "aco")) {
681 			fprintf(stderr, "*******************************************************************************\n");
682 			fprintf(stderr, "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
683 			fprintf(stderr, "*******************************************************************************\n");
684 		}
685 		if (strstr(radv_perftest_str, "llvm")) {
686 			fprintf(stderr, "*********************************************************************************\n");
687 			fprintf(stderr, "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
688 			fprintf(stderr, "*********************************************************************************\n");
689 			abort();
690 		}
691 	}
692 
693 	if (instance->debug_flags & RADV_DEBUG_STARTUP)
694 		radv_logi("Created an instance");
695 
696 	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
697 		int idx;
698 		for (idx = 0; idx < RADV_INSTANCE_EXTENSION_COUNT; idx++) {
699 			if (!strcmp(pCreateInfo->ppEnabledExtensionNames[i],
700 				    radv_instance_extensions[idx].extensionName))
701 				break;
702 		}
703 
704 		if (idx >= RADV_INSTANCE_EXTENSION_COUNT ||
705 		    !radv_instance_extensions_supported.extensions[idx]) {
706 			vk_object_base_finish(&instance->base);
707 			vk_free2(&default_alloc, pAllocator, instance);
708 			return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
709 		}
710 
711 		instance->enabled_extensions.extensions[idx] = true;
712 	}
713 
714 	bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
715 
716 	for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
717 		/* Vulkan requires that entrypoints for extensions which have
718 		 * not been enabled must not be advertised.
719 		 */
720 		if (!unchecked &&
721 		    !radv_instance_entrypoint_is_enabled(i, instance->apiVersion,
722 							 &instance->enabled_extensions)) {
723 			instance->dispatch.entrypoints[i] = NULL;
724 		} else {
725 			instance->dispatch.entrypoints[i] =
726 				radv_instance_dispatch_table.entrypoints[i];
727 		}
728 	}
729 
730 	 for (unsigned i = 0; i < ARRAY_SIZE(instance->physical_device_dispatch.entrypoints); i++) {
731 		/* Vulkan requires that entrypoints for extensions which have
732 		 * not been enabled must not be advertised.
733 		 */
734 		if (!unchecked &&
735 		    !radv_physical_device_entrypoint_is_enabled(i, instance->apiVersion,
736 								&instance->enabled_extensions)) {
737 			instance->physical_device_dispatch.entrypoints[i] = NULL;
738 		} else {
739 			instance->physical_device_dispatch.entrypoints[i] =
740 				radv_physical_device_dispatch_table.entrypoints[i];
741 		}
742 	}
743 
744 	for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) {
745 		/* Vulkan requires that entrypoints for extensions which have
746 		 * not been enabled must not be advertised.
747 		 */
748 		if (!unchecked &&
749 		    !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
750 						       &instance->enabled_extensions, NULL)) {
751 			instance->device_dispatch.entrypoints[i] = NULL;
752 		} else {
753 			instance->device_dispatch.entrypoints[i] =
754 				radv_device_dispatch_table.entrypoints[i];
755 		}
756 	}
757 
758 	instance->physical_devices_enumerated = false;
759 	list_inithead(&instance->physical_devices);
760 
761 	result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
762 	if (result != VK_SUCCESS) {
763 		vk_object_base_finish(&instance->base);
764 		vk_free2(&default_alloc, pAllocator, instance);
765 		return vk_error(instance, result);
766 	}
767 
768 	glsl_type_singleton_init_or_ref();
769 
770 	VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
771 
772 	radv_init_dri_options(instance);
773 	radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
774 
775 	*pInstance = radv_instance_to_handle(instance);
776 
777 	return VK_SUCCESS;
778 }
779 
radv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)780 void radv_DestroyInstance(
781 	VkInstance                                  _instance,
782 	const VkAllocationCallbacks*                pAllocator)
783 {
784 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
785 
786 	if (!instance)
787 		return;
788 
789 	list_for_each_entry_safe(struct radv_physical_device, pdevice,
790 				 &instance->physical_devices, link) {
791 		radv_physical_device_destroy(pdevice);
792 	}
793 
794 	vk_free(&instance->alloc, instance->engineName);
795 	vk_free(&instance->alloc, instance->applicationName);
796 
797 	VG(VALGRIND_DESTROY_MEMPOOL(instance));
798 
799 	glsl_type_singleton_decref();
800 
801 	driDestroyOptionCache(&instance->dri_options);
802 	driDestroyOptionInfo(&instance->available_dri_options);
803 
804 	vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
805 
806 	vk_object_base_finish(&instance->base);
807 	vk_free(&instance->alloc, instance);
808 }
809 
810 static VkResult
radv_enumerate_physical_devices(struct radv_instance * instance)811 radv_enumerate_physical_devices(struct radv_instance *instance)
812 {
813 	if (instance->physical_devices_enumerated)
814 		return VK_SUCCESS;
815 
816 	instance->physical_devices_enumerated = true;
817 
818 	/* TODO: Check for more devices ? */
819 	drmDevicePtr devices[8];
820 	VkResult result = VK_SUCCESS;
821 	int max_devices;
822 
823 	if (getenv("RADV_FORCE_FAMILY")) {
824 		/* When RADV_FORCE_FAMILY is set, the driver creates a nul
825 		 * device that allows to test the compiler without having an
826 		 * AMDGPU instance.
827 		 */
828 		struct radv_physical_device *pdevice;
829 
830 		result = radv_physical_device_try_create(instance, NULL, &pdevice);
831 		if (result != VK_SUCCESS)
832 			return result;
833 
834 		list_addtail(&pdevice->link, &instance->physical_devices);
835 		return VK_SUCCESS;
836 	}
837 
838 	max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
839 
840 	if (instance->debug_flags & RADV_DEBUG_STARTUP)
841 		radv_logi("Found %d drm nodes", max_devices);
842 
843 	if (max_devices < 1)
844 		return vk_error(instance, VK_SUCCESS);
845 
846 	for (unsigned i = 0; i < (unsigned)max_devices; i++) {
847 		if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
848 		    devices[i]->bustype == DRM_BUS_PCI &&
849 		    devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
850 
851 			struct radv_physical_device *pdevice;
852 			result = radv_physical_device_try_create(instance, devices[i],
853 								 &pdevice);
854 			/* Incompatible DRM device, skip. */
855 			if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
856 				result = VK_SUCCESS;
857 				continue;
858 			}
859 
860 			/* Error creating the physical device, report the error. */
861 			if (result != VK_SUCCESS)
862 				break;
863 
864 			list_addtail(&pdevice->link, &instance->physical_devices);
865 		}
866 	}
867 	drmFreeDevices(devices, max_devices);
868 
869 	/* If we successfully enumerated any devices, call it success */
870 	return result;
871 }
872 
radv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)873 VkResult radv_EnumeratePhysicalDevices(
874 	VkInstance                                  _instance,
875 	uint32_t*                                   pPhysicalDeviceCount,
876 	VkPhysicalDevice*                           pPhysicalDevices)
877 {
878 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
879 	VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
880 
881 	VkResult result = radv_enumerate_physical_devices(instance);
882 	if (result != VK_SUCCESS)
883 		return result;
884 
885 	list_for_each_entry(struct radv_physical_device, pdevice,
886 			    &instance->physical_devices, link) {
887 		vk_outarray_append(&out, i) {
888 			*i = radv_physical_device_to_handle(pdevice);
889 		}
890 	}
891 
892 	return vk_outarray_status(&out);
893 }
894 
radv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)895 VkResult radv_EnumeratePhysicalDeviceGroups(
896     VkInstance                                  _instance,
897     uint32_t*                                   pPhysicalDeviceGroupCount,
898     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
899 {
900 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
901 	VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
902 			      pPhysicalDeviceGroupCount);
903 
904 	VkResult result = radv_enumerate_physical_devices(instance);
905 	if (result != VK_SUCCESS)
906 		return result;
907 
908 	list_for_each_entry(struct radv_physical_device, pdevice,
909 			    &instance->physical_devices, link) {
910 		vk_outarray_append(&out, p) {
911 			p->physicalDeviceCount = 1;
912 			memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
913 			p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
914 			p->subsetAllocation = false;
915 		}
916 	}
917 
918 	return vk_outarray_status(&out);
919 }
920 
radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)921 void radv_GetPhysicalDeviceFeatures(
922 	VkPhysicalDevice                            physicalDevice,
923 	VkPhysicalDeviceFeatures*                   pFeatures)
924 {
925 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
926 	memset(pFeatures, 0, sizeof(*pFeatures));
927 
928 	*pFeatures = (VkPhysicalDeviceFeatures) {
929 		.robustBufferAccess                       = true,
930 		.fullDrawIndexUint32                      = true,
931 		.imageCubeArray                           = true,
932 		.independentBlend                         = true,
933 		.geometryShader                           = true,
934 		.tessellationShader                       = true,
935 		.sampleRateShading                        = true,
936 		.dualSrcBlend                             = true,
937 		.logicOp                                  = true,
938 		.multiDrawIndirect                        = true,
939 		.drawIndirectFirstInstance                = true,
940 		.depthClamp                               = true,
941 		.depthBiasClamp                           = true,
942 		.fillModeNonSolid                         = true,
943 		.depthBounds                              = true,
944 		.wideLines                                = true,
945 		.largePoints                              = true,
946 		.alphaToOne                               = true,
947 		.multiViewport                            = true,
948 		.samplerAnisotropy                        = true,
949 		.textureCompressionETC2                   = radv_device_supports_etc(pdevice),
950 		.textureCompressionASTC_LDR               = false,
951 		.textureCompressionBC                     = true,
952 		.occlusionQueryPrecise                    = true,
953 		.pipelineStatisticsQuery                  = true,
954 		.vertexPipelineStoresAndAtomics           = true,
955 		.fragmentStoresAndAtomics                 = true,
956 		.shaderTessellationAndGeometryPointSize   = true,
957 		.shaderImageGatherExtended                = true,
958 		.shaderStorageImageExtendedFormats        = true,
959 		.shaderStorageImageMultisample            = true,
960 		.shaderUniformBufferArrayDynamicIndexing  = true,
961 		.shaderSampledImageArrayDynamicIndexing   = true,
962 		.shaderStorageBufferArrayDynamicIndexing  = true,
963 		.shaderStorageImageArrayDynamicIndexing   = true,
964 		.shaderStorageImageReadWithoutFormat      = true,
965 		.shaderStorageImageWriteWithoutFormat     = true,
966 		.shaderClipDistance                       = true,
967 		.shaderCullDistance                       = true,
968 		.shaderFloat64                            = true,
969 		.shaderInt64                              = true,
970 		.shaderInt16                              = true,
971 		.sparseBinding                            = true,
972 		.variableMultisampleRate                  = true,
973 		.shaderResourceMinLod                     = true,
974 		.inheritedQueries                         = true,
975 	};
976 }
977 
978 static void
radv_get_physical_device_features_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)979 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
980 				      VkPhysicalDeviceVulkan11Features *f)
981 {
982 	assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
983 
984 	f->storageBuffer16BitAccess            = true;
985 	f->uniformAndStorageBuffer16BitAccess  = true;
986 	f->storagePushConstant16               = true;
987 	f->storageInputOutput16                = pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
988 	f->multiview                           = true;
989 	f->multiviewGeometryShader             = true;
990 	f->multiviewTessellationShader         = true;
991 	f->variablePointersStorageBuffer       = true;
992 	f->variablePointers                    = true;
993 	f->protectedMemory                     = false;
994 	f->samplerYcbcrConversion              = true;
995 	f->shaderDrawParameters                = true;
996 }
997 
998 static void
radv_get_physical_device_features_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)999 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1000 				      VkPhysicalDeviceVulkan12Features *f)
1001 {
1002 	assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1003 
1004 	f->samplerMirrorClampToEdge = true;
1005 	f->drawIndirectCount = true;
1006 	f->storageBuffer8BitAccess = true;
1007 	f->uniformAndStorageBuffer8BitAccess = true;
1008 	f->storagePushConstant8 = true;
1009 	f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1010 	f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1011 	f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1012 	f->shaderInt8 = true;
1013 
1014 	f->descriptorIndexing = true;
1015 	f->shaderInputAttachmentArrayDynamicIndexing = true;
1016 	f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1017 	f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1018 	f->shaderUniformBufferArrayNonUniformIndexing = true;
1019 	f->shaderSampledImageArrayNonUniformIndexing = true;
1020 	f->shaderStorageBufferArrayNonUniformIndexing = true;
1021 	f->shaderStorageImageArrayNonUniformIndexing = true;
1022 	f->shaderInputAttachmentArrayNonUniformIndexing = true;
1023 	f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1024 	f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1025 	f->descriptorBindingUniformBufferUpdateAfterBind = true;
1026 	f->descriptorBindingSampledImageUpdateAfterBind = true;
1027 	f->descriptorBindingStorageImageUpdateAfterBind = true;
1028 	f->descriptorBindingStorageBufferUpdateAfterBind = true;
1029 	f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1030 	f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1031 	f->descriptorBindingUpdateUnusedWhilePending = true;
1032 	f->descriptorBindingPartiallyBound = true;
1033 	f->descriptorBindingVariableDescriptorCount = true;
1034 	f->runtimeDescriptorArray = true;
1035 
1036 	f->samplerFilterMinmax = true;
1037 	f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1038 	f->imagelessFramebuffer = true;
1039 	f->uniformBufferStandardLayout = true;
1040 	f->shaderSubgroupExtendedTypes = true;
1041 	f->separateDepthStencilLayouts = true;
1042 	f->hostQueryReset = true;
1043 	f->timelineSemaphore = pdevice->rad_info.has_syncobj_wait_for_submit;
1044 	f->bufferDeviceAddress = true;
1045 	f->bufferDeviceAddressCaptureReplay = false;
1046 	f->bufferDeviceAddressMultiDevice = false;
1047 	f->vulkanMemoryModel = true;
1048 	f->vulkanMemoryModelDeviceScope = true;
1049 	f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1050 	f->shaderOutputViewportIndex = true;
1051 	f->shaderOutputLayer = true;
1052 	f->subgroupBroadcastDynamicId = true;
1053 }
1054 
radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1055 void radv_GetPhysicalDeviceFeatures2(
1056 	VkPhysicalDevice                            physicalDevice,
1057 	VkPhysicalDeviceFeatures2                  *pFeatures)
1058 {
1059 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1060 	radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1061 
1062 	VkPhysicalDeviceVulkan11Features core_1_1 = {
1063 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1064 	};
1065 	radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1066 
1067 	VkPhysicalDeviceVulkan12Features core_1_2 = {
1068 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1069 	};
1070 	radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1071 
1072 #define CORE_FEATURE(major, minor, feature) \
1073    features->feature = core_##major##_##minor.feature
1074 
1075 	vk_foreach_struct(ext, pFeatures->pNext) {
1076 		switch (ext->sType) {
1077 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
1078 			VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
1079 			CORE_FEATURE(1, 1, variablePointersStorageBuffer);
1080 			CORE_FEATURE(1, 1, variablePointers);
1081 			break;
1082 		}
1083 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
1084 			VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
1085 			CORE_FEATURE(1, 1, multiview);
1086 			CORE_FEATURE(1, 1, multiviewGeometryShader);
1087 			CORE_FEATURE(1, 1, multiviewTessellationShader);
1088 			break;
1089 		}
1090 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
1091 			VkPhysicalDeviceShaderDrawParametersFeatures *features =
1092 			    (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
1093 			CORE_FEATURE(1, 1, shaderDrawParameters);
1094 			break;
1095 		}
1096 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
1097 			VkPhysicalDeviceProtectedMemoryFeatures *features =
1098 			    (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
1099 			CORE_FEATURE(1, 1, protectedMemory);
1100 			break;
1101 		}
1102 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
1103 			VkPhysicalDevice16BitStorageFeatures *features =
1104 			    (VkPhysicalDevice16BitStorageFeatures*)ext;
1105 			CORE_FEATURE(1, 1, storageBuffer16BitAccess);
1106 			CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
1107 			CORE_FEATURE(1, 1, storagePushConstant16);
1108 			CORE_FEATURE(1, 1, storageInputOutput16);
1109 			break;
1110 		}
1111 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
1112 			VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
1113 			    (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
1114 			CORE_FEATURE(1, 1, samplerYcbcrConversion);
1115 			break;
1116 		}
1117 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
1118 			VkPhysicalDeviceDescriptorIndexingFeatures *features =
1119 				(VkPhysicalDeviceDescriptorIndexingFeatures*)ext;
1120 			CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
1121 			CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
1122 			CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
1123 			CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
1124 			CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
1125 			CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
1126 			CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
1127 			CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
1128 			CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
1129 			CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
1130 			CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
1131 			CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
1132 			CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
1133 			CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
1134 			CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
1135 			CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
1136 			CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
1137 			CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
1138 			CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
1139 			CORE_FEATURE(1, 2, runtimeDescriptorArray);
1140 			break;
1141 		}
1142 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1143 			VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1144 				(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1145 			features->conditionalRendering = true;
1146 			features->inheritedConditionalRendering = false;
1147 			break;
1148 		}
1149 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1150 			VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1151 				(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1152 			features->vertexAttributeInstanceRateDivisor = true;
1153 			features->vertexAttributeInstanceRateZeroDivisor = true;
1154 			break;
1155 		}
1156 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1157 			VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1158 				(VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
1159 			features->transformFeedback = true;
1160 			features->geometryStreams = !pdevice->use_ngg_streamout;
1161 			break;
1162 		}
1163 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1164 			VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1165 				(VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1166 			CORE_FEATURE(1, 2, scalarBlockLayout);
1167 			break;
1168 		}
1169 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1170 			VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1171 				(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1172 			features->memoryPriority = true;
1173 			break;
1174 		}
1175 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1176 			VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1177 				(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1178 			features->bufferDeviceAddress = true;
1179 			features->bufferDeviceAddressCaptureReplay = false;
1180 			features->bufferDeviceAddressMultiDevice = false;
1181 			break;
1182 		}
1183 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
1184 			VkPhysicalDeviceBufferDeviceAddressFeatures *features =
1185 				(VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
1186 			CORE_FEATURE(1, 2, bufferDeviceAddress);
1187 			CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1188 			CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1189 			break;
1190 		}
1191 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1192 			VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1193 				(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1194 			features->depthClipEnable = true;
1195 			break;
1196 		}
1197 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
1198 			VkPhysicalDeviceHostQueryResetFeatures *features =
1199 				(VkPhysicalDeviceHostQueryResetFeatures *)ext;
1200 			CORE_FEATURE(1, 2, hostQueryReset);
1201 			break;
1202 		}
1203 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
1204 			VkPhysicalDevice8BitStorageFeatures *features =
1205 			    (VkPhysicalDevice8BitStorageFeatures *)ext;
1206 			CORE_FEATURE(1, 2, storageBuffer8BitAccess);
1207 			CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
1208 			CORE_FEATURE(1, 2, storagePushConstant8);
1209 			break;
1210 		}
1211 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
1212 			VkPhysicalDeviceShaderFloat16Int8Features *features =
1213 				(VkPhysicalDeviceShaderFloat16Int8Features*)ext;
1214 			CORE_FEATURE(1, 2, shaderFloat16);
1215 			CORE_FEATURE(1, 2, shaderInt8);
1216 			break;
1217 		}
1218 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
1219 			VkPhysicalDeviceShaderAtomicInt64Features *features =
1220 				(VkPhysicalDeviceShaderAtomicInt64Features *)ext;
1221 			CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
1222 			CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
1223 			break;
1224 		}
1225 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1226 			VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1227 				(VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1228 			features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1229 			break;
1230 		}
1231 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1232 			VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1233 				(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1234 
1235 			features->inlineUniformBlock = true;
1236 			features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1237 			break;
1238 		}
1239 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1240 			VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1241 				(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1242 			features->computeDerivativeGroupQuads = false;
1243 			features->computeDerivativeGroupLinear = true;
1244 			break;
1245 		}
1246 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1247 			VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1248 				(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
1249 			features->ycbcrImageArrays = true;
1250 			break;
1251 		}
1252 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
1253 			VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
1254 				(VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
1255 			CORE_FEATURE(1, 2, uniformBufferStandardLayout);
1256 			break;
1257 		}
1258 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1259 			VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1260 				(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1261 			features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1262 			break;
1263 		}
1264 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
1265 			VkPhysicalDeviceImagelessFramebufferFeatures *features =
1266 				(VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
1267 			CORE_FEATURE(1, 2, imagelessFramebuffer);
1268 			break;
1269 		}
1270 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1271 			VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1272 				(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1273 			features->pipelineExecutableInfo = true;
1274 			break;
1275 		}
1276 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1277 			VkPhysicalDeviceShaderClockFeaturesKHR *features =
1278 				(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1279 			features->shaderSubgroupClock = true;
1280 			features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1281 			break;
1282 		}
1283 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1284 			VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1285 				(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1286 			features->texelBufferAlignment = true;
1287 			break;
1288 		}
1289 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
1290 			VkPhysicalDeviceTimelineSemaphoreFeatures *features =
1291 				(VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
1292 			CORE_FEATURE(1, 2, timelineSemaphore);
1293 			break;
1294 		}
1295 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1296 			VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1297 				(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1298 			features->subgroupSizeControl = true;
1299 			features->computeFullSubgroups = true;
1300 			break;
1301 		}
1302 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1303 			VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1304 				(VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1305 			features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1306 			break;
1307 		}
1308 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
1309 			VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
1310 				(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
1311 			CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
1312 			break;
1313 		}
1314 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
1315 			VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
1316 				(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
1317 			CORE_FEATURE(1, 2, separateDepthStencilLayouts);
1318 			break;
1319 		}
1320 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
1321 			radv_get_physical_device_features_1_1(pdevice, (void *)ext);
1322 			break;
1323 		}
1324 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
1325 			radv_get_physical_device_features_1_2(pdevice, (void *)ext);
1326 			break;
1327 		}
1328 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1329 			VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1330 				(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1331 			features->rectangularLines = false;
1332 			features->bresenhamLines = true;
1333 			features->smoothLines = false;
1334 			features->stippledRectangularLines = false;
1335 			features->stippledBresenhamLines = true;
1336 			features->stippledSmoothLines = false;
1337 			break;
1338 		}
1339 		case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1340 			VkDeviceMemoryOverallocationCreateInfoAMD *features =
1341 				(VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1342 			features->overallocationBehavior = true;
1343 			break;
1344 		}
1345 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1346 			VkPhysicalDeviceRobustness2FeaturesEXT *features =
1347 				(VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1348 			features->robustBufferAccess2 = true;
1349 			features->robustImageAccess2 = true;
1350 			features->nullDescriptor = true;
1351 			break;
1352 		}
1353 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1354 			VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1355 				(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1356 			features->customBorderColors = true;
1357 			features->customBorderColorWithoutFormat = true;
1358 			break;
1359 		}
1360 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1361 			VkPhysicalDevicePrivateDataFeaturesEXT *features =
1362 				(VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1363 			features->privateData = true;
1364 			break;
1365 		}
1366 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1367 			VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1368 				(VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1369 			features-> pipelineCreationCacheControl = true;
1370 			break;
1371 		}
1372 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
1373 			VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
1374 				(VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
1375 			CORE_FEATURE(1, 2, vulkanMemoryModel);
1376 			CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
1377 			CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
1378 			break;
1379 		}
1380 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1381 			VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1382 				(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *) ext;
1383 			features->extendedDynamicState = true;
1384 			break;
1385 		}
1386 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1387 			VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1388 				(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1389 			features->robustImageAccess = true;
1390 			break;
1391 		}
1392 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1393 			VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1394 				(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1395 			features->shaderBufferFloat32Atomics = true;
1396 			features->shaderBufferFloat32AtomicAdd = false;
1397 			features->shaderBufferFloat64Atomics = true;
1398 			features->shaderBufferFloat64AtomicAdd = false;
1399 			features->shaderSharedFloat32Atomics = true;
1400 			features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
1401 								 (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
1402 			features->shaderSharedFloat64Atomics = true;
1403 			features->shaderSharedFloat64AtomicAdd = false;
1404 			features->shaderImageFloat32Atomics = true;
1405 			features->shaderImageFloat32AtomicAdd = false;
1406 			features->sparseImageFloat32Atomics = false;
1407 			features->sparseImageFloat32AtomicAdd = false;
1408 			break;
1409 		}
1410 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1411 			VkPhysicalDevice4444FormatsFeaturesEXT *features =
1412 				(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1413 			features->formatA4R4G4B4 = true;
1414 			features->formatA4B4G4R4 = true;
1415 			break;
1416 		}
1417 		default:
1418 			break;
1419 		}
1420 	}
1421 #undef CORE_FEATURE
1422 }
1423 
1424 static size_t
radv_max_descriptor_set_size()1425 radv_max_descriptor_set_size()
1426 {
1427 	/* make sure that the entire descriptor set is addressable with a signed
1428 	 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1429 	 * be at most 2 GiB. the combined image & samples object count as one of
1430 	 * both. This limit is for the pipeline layout, not for the set layout, but
1431 	 * there is no set limit, so we just set a pipeline limit. I don't think
1432 	 * any app is going to hit this soon. */
1433 	return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
1434 	                     - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1435 	          (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1436 	           32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1437 	           32 /* sampler, largest when combined with image */ +
1438 	           64 /* sampled image */ +
1439 	           64 /* storage image */);
1440 }
1441 
1442 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdevice)1443 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1444 {
1445 	uint32_t uniform_offset_alignment = driQueryOptioni(&pdevice->instance->dri_options,
1446 	                                                   "radv_override_uniform_offset_alignment");
1447 	if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1448 		fprintf(stderr, "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1449 		                "not a power of two\n", uniform_offset_alignment);
1450 		uniform_offset_alignment = 0;
1451 	}
1452 
1453 	/* Take at least the hardware limit. */
1454 	return MAX2(uniform_offset_alignment, 4);
1455 }
1456 
radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1457 void radv_GetPhysicalDeviceProperties(
1458 	VkPhysicalDevice                            physicalDevice,
1459 	VkPhysicalDeviceProperties*                 pProperties)
1460 {
1461 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1462 	VkSampleCountFlags sample_counts = 0xf;
1463 
1464 	size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1465 
1466 	VkPhysicalDeviceLimits limits = {
1467 		.maxImageDimension1D                      = (1 << 14),
1468 		.maxImageDimension2D                      = (1 << 14),
1469 		.maxImageDimension3D                      = (1 << 11),
1470 		.maxImageDimensionCube                    = (1 << 14),
1471 		.maxImageArrayLayers                      = (1 << 11),
1472 		.maxTexelBufferElements                   = UINT32_MAX,
1473 		.maxUniformBufferRange                    = UINT32_MAX,
1474 		.maxStorageBufferRange                    = UINT32_MAX,
1475 		.maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1476 		.maxMemoryAllocationCount                 = UINT32_MAX,
1477 		.maxSamplerAllocationCount                = 64 * 1024,
1478 		.bufferImageGranularity                   = 64, /* A cache line */
1479 		.sparseAddressSpaceSize                   = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1480 		.maxBoundDescriptorSets                   = MAX_SETS,
1481 		.maxPerStageDescriptorSamplers            = max_descriptor_set_size,
1482 		.maxPerStageDescriptorUniformBuffers      = max_descriptor_set_size,
1483 		.maxPerStageDescriptorStorageBuffers      = max_descriptor_set_size,
1484 		.maxPerStageDescriptorSampledImages       = max_descriptor_set_size,
1485 		.maxPerStageDescriptorStorageImages       = max_descriptor_set_size,
1486 		.maxPerStageDescriptorInputAttachments    = max_descriptor_set_size,
1487 		.maxPerStageResources                     = max_descriptor_set_size,
1488 		.maxDescriptorSetSamplers                 = max_descriptor_set_size,
1489 		.maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
1490 		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
1491 		.maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
1492 		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
1493 		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
1494 		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
1495 		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
1496 		.maxVertexInputAttributes                 = MAX_VERTEX_ATTRIBS,
1497 		.maxVertexInputBindings                   = MAX_VBS,
1498 		.maxVertexInputAttributeOffset            = 2047,
1499 		.maxVertexInputBindingStride              = 2048,
1500 		.maxVertexOutputComponents                = 128,
1501 		.maxTessellationGenerationLevel           = 64,
1502 		.maxTessellationPatchSize                 = 32,
1503 		.maxTessellationControlPerVertexInputComponents = 128,
1504 		.maxTessellationControlPerVertexOutputComponents = 128,
1505 		.maxTessellationControlPerPatchOutputComponents = 120,
1506 		.maxTessellationControlTotalOutputComponents = 4096,
1507 		.maxTessellationEvaluationInputComponents = 128,
1508 		.maxTessellationEvaluationOutputComponents = 128,
1509 		.maxGeometryShaderInvocations             = 127,
1510 		.maxGeometryInputComponents               = 64,
1511 		.maxGeometryOutputComponents              = 128,
1512 		.maxGeometryOutputVertices                = 256,
1513 		.maxGeometryTotalOutputComponents         = 1024,
1514 		.maxFragmentInputComponents               = 128,
1515 		.maxFragmentOutputAttachments             = 8,
1516 		.maxFragmentDualSrcAttachments            = 1,
1517 		.maxFragmentCombinedOutputResources       = 8,
1518 		.maxComputeSharedMemorySize               = 32768,
1519 		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1520 		.maxComputeWorkGroupInvocations           = 1024,
1521 		.maxComputeWorkGroupSize = {
1522 			1024,
1523 			1024,
1524 			1024
1525 		},
1526 		.subPixelPrecisionBits                    = 8,
1527 		.subTexelPrecisionBits                    = 8,
1528 		.mipmapPrecisionBits                      = 8,
1529 		.maxDrawIndexedIndexValue                 = UINT32_MAX,
1530 		.maxDrawIndirectCount                     = UINT32_MAX,
1531 		.maxSamplerLodBias                        = 16,
1532 		.maxSamplerAnisotropy                     = 16,
1533 		.maxViewports                             = MAX_VIEWPORTS,
1534 		.maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1535 		.viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1536 		.viewportSubPixelBits                     = 8,
1537 		.minMemoryMapAlignment                    = 4096, /* A page */
1538 		.minTexelBufferOffsetAlignment            = 4,
1539 		.minUniformBufferOffsetAlignment          = radv_uniform_buffer_offset_alignment(pdevice),
1540 		.minStorageBufferOffsetAlignment          = 4,
1541 		.minTexelOffset                           = -32,
1542 		.maxTexelOffset                           = 31,
1543 		.minTexelGatherOffset                     = -32,
1544 		.maxTexelGatherOffset                     = 31,
1545 		.minInterpolationOffset                   = -2,
1546 		.maxInterpolationOffset                   = 2,
1547 		.subPixelInterpolationOffsetBits          = 8,
1548 		.maxFramebufferWidth                      = (1 << 14),
1549 		.maxFramebufferHeight                     = (1 << 14),
1550 		.maxFramebufferLayers                     = (1 << 10),
1551 		.framebufferColorSampleCounts             = sample_counts,
1552 		.framebufferDepthSampleCounts             = sample_counts,
1553 		.framebufferStencilSampleCounts           = sample_counts,
1554 		.framebufferNoAttachmentsSampleCounts     = sample_counts,
1555 		.maxColorAttachments                      = MAX_RTS,
1556 		.sampledImageColorSampleCounts            = sample_counts,
1557 		.sampledImageIntegerSampleCounts          = sample_counts,
1558 		.sampledImageDepthSampleCounts            = sample_counts,
1559 		.sampledImageStencilSampleCounts          = sample_counts,
1560 		.storageImageSampleCounts                 = sample_counts,
1561 		.maxSampleMaskWords                       = 1,
1562 		.timestampComputeAndGraphics              = true,
1563 		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1564 		.maxClipDistances                         = 8,
1565 		.maxCullDistances                         = 8,
1566 		.maxCombinedClipAndCullDistances          = 8,
1567 		.discreteQueuePriorities                  = 2,
1568 		.pointSizeRange                           = { 0.0, 8191.875 },
1569 		.lineWidthRange                           = { 0.0, 8191.875 },
1570 		.pointSizeGranularity                     = (1.0 / 8.0),
1571 		.lineWidthGranularity                     = (1.0 / 8.0),
1572 		.strictLines                              = false, /* FINISHME */
1573 		.standardSampleLocations                  = true,
1574 		.optimalBufferCopyOffsetAlignment         = 128,
1575 		.optimalBufferCopyRowPitchAlignment       = 128,
1576 		.nonCoherentAtomSize                      = 64,
1577 	};
1578 
1579 	*pProperties = (VkPhysicalDeviceProperties) {
1580 		.apiVersion = radv_physical_device_api_version(pdevice),
1581 		.driverVersion = vk_get_driver_version(),
1582 		.vendorID = ATI_VENDOR_ID,
1583 		.deviceID = pdevice->rad_info.pci_id,
1584 		.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1585 		.limits = limits,
1586 		.sparseProperties = {0},
1587 	};
1588 
1589 	strcpy(pProperties->deviceName, pdevice->name);
1590 	memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1591 }
1592 
1593 static void
radv_get_physical_device_properties_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1594 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1595 					VkPhysicalDeviceVulkan11Properties *p)
1596 {
1597 	assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1598 
1599 	memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1600 	memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1601 	memset(p->deviceLUID, 0, VK_LUID_SIZE);
1602 	/* The LUID is for Windows. */
1603 	p->deviceLUIDValid = false;
1604 	p->deviceNodeMask = 0;
1605 
1606 	p->subgroupSize = RADV_SUBGROUP_SIZE;
1607 	p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS |
1608 				     VK_SHADER_STAGE_COMPUTE_BIT;
1609 	p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1610 					 VK_SUBGROUP_FEATURE_VOTE_BIT |
1611 					 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1612 					 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1613 					 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
1614 					 VK_SUBGROUP_FEATURE_QUAD_BIT |
1615 					 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1616 					 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1617 	p->subgroupQuadOperationsInAllStages = true;
1618 
1619 	p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1620 	p->maxMultiviewViewCount = MAX_VIEWS;
1621 	p->maxMultiviewInstanceIndex = INT_MAX;
1622 	p->protectedNoFault = false;
1623 	p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1624 	p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1625 }
1626 
1627 static void
radv_get_physical_device_properties_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1628 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1629 					VkPhysicalDeviceVulkan12Properties *p)
1630 {
1631 	assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1632 
1633 	p->driverID = VK_DRIVER_ID_MESA_RADV;
1634 	snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1635 	snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1636 		 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
1637 		 radv_get_compiler_string(pdevice));
1638 	p->conformanceVersion = (VkConformanceVersion) {
1639 		.major = 1,
1640 		.minor = 2,
1641 		.subminor = 3,
1642 		.patch = 0,
1643 	};
1644 
1645 	/* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1646 	 * controlled by the same config register.
1647 	 */
1648 	if (pdevice->rad_info.has_packed_math_16bit) {
1649 		p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1650 		p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1651 	} else {
1652 		p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1653 		p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1654 	}
1655 
1656 	/* With LLVM, do not allow both preserving and flushing denorms because
1657 	 * different shaders in the same pipeline can have different settings and
1658 	 * this won't work for merged shaders. To make it work, this requires LLVM
1659 	 * support for changing the register. The same logic applies for the
1660 	 * rounding modes because they are configured with the same config
1661 	 * register.
1662 	 */
1663 	p->shaderDenormFlushToZeroFloat32 = true;
1664 	p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1665 	p->shaderRoundingModeRTEFloat32 = true;
1666 	p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1667 	p->shaderSignedZeroInfNanPreserveFloat32 = true;
1668 
1669 	p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1670 	p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1671 	p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1672 	p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1673 	p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1674 
1675 	p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1676 	p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1677 	p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1678 	p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1679 	p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1680 
1681 	p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1682 	p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1683 	p->shaderSampledImageArrayNonUniformIndexingNative = false;
1684 	p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1685 	p->shaderStorageImageArrayNonUniformIndexingNative = false;
1686 	p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1687 	p->robustBufferAccessUpdateAfterBind = false;
1688 	p->quadDivergentImplicitLod = false;
1689 
1690 	size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1691 		MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1692 			(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1693 			 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1694 			 32 /* sampler, largest when combined with image */ +
1695 			 64 /* sampled image */ +
1696 			 64 /* storage image */);
1697 	p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1698 	p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1699 	p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1700 	p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1701 	p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1702 	p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1703 	p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1704 	p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1705 	p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1706 	p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1707 	p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1708 	p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1709 	p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1710 	p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1711 	p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1712 
1713 	/* We support all of the depth resolve modes */
1714 	p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1715 					    VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
1716 					    VK_RESOLVE_MODE_MIN_BIT_KHR |
1717 					    VK_RESOLVE_MODE_MAX_BIT_KHR;
1718 
1719 	/* Average doesn't make sense for stencil so we don't support that */
1720 	p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1721 					      VK_RESOLVE_MODE_MIN_BIT_KHR |
1722 					      VK_RESOLVE_MODE_MAX_BIT_KHR;
1723 
1724 	p->independentResolveNone = true;
1725 	p->independentResolve = true;
1726 
1727 	/* GFX6-8 only support single channel min/max filter. */
1728 	p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1729 	p->filterMinmaxSingleComponentFormats = true;
1730 
1731 	p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1732 
1733 	p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1734 }
1735 
radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1736 void radv_GetPhysicalDeviceProperties2(
1737 	VkPhysicalDevice                            physicalDevice,
1738 	VkPhysicalDeviceProperties2                *pProperties)
1739 {
1740 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1741 	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1742 
1743 	VkPhysicalDeviceVulkan11Properties core_1_1 = {
1744 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1745 	};
1746 	radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1747 
1748 	VkPhysicalDeviceVulkan12Properties core_1_2 = {
1749 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1750 	};
1751 	radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1752 
1753 #define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
1754    memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
1755           sizeof(core_##major##_##minor.core_property))
1756 
1757 #define CORE_PROPERTY(major, minor, property) \
1758    CORE_RENAMED_PROPERTY(major, minor, property, property)
1759 
1760 	vk_foreach_struct(ext, pProperties->pNext) {
1761 		switch (ext->sType) {
1762 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1763 			VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1764 				(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1765 			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1766 			break;
1767 		}
1768 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1769 			VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1770 			CORE_PROPERTY(1, 1, deviceUUID);
1771 			CORE_PROPERTY(1, 1, driverUUID);
1772 			CORE_PROPERTY(1, 1, deviceLUID);
1773 			CORE_PROPERTY(1, 1, deviceLUIDValid);
1774 			break;
1775 		}
1776 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1777 			VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1778 			CORE_PROPERTY(1, 1, maxMultiviewViewCount);
1779 			CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
1780 			break;
1781 		}
1782 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1783 			VkPhysicalDevicePointClippingProperties *properties =
1784 			    (VkPhysicalDevicePointClippingProperties*)ext;
1785 			CORE_PROPERTY(1, 1, pointClippingBehavior);
1786 			break;
1787 		}
1788 		case  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1789 			VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1790 			    (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1791 			properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1792 			break;
1793 		}
1794 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1795 			VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1796 			    (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1797 			properties->minImportedHostPointerAlignment = 4096;
1798 			break;
1799 		}
1800 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1801 			VkPhysicalDeviceSubgroupProperties *properties =
1802 			    (VkPhysicalDeviceSubgroupProperties*)ext;
1803 			CORE_PROPERTY(1, 1, subgroupSize);
1804 			CORE_RENAMED_PROPERTY(1, 1, supportedStages,
1805 						    subgroupSupportedStages);
1806 			CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
1807 						    subgroupSupportedOperations);
1808 			CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
1809 						    subgroupQuadOperationsInAllStages);
1810 			break;
1811 		}
1812 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1813 			VkPhysicalDeviceMaintenance3Properties *properties =
1814 			    (VkPhysicalDeviceMaintenance3Properties*)ext;
1815 			CORE_PROPERTY(1, 1, maxPerSetDescriptors);
1816 			CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
1817 			break;
1818 		}
1819 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
1820 			VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
1821 				(VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
1822 			CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
1823 			CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
1824 			break;
1825 		}
1826 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1827 			VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1828 				(VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1829 
1830 			/* Shader engines. */
1831 			properties->shaderEngineCount =
1832 				pdevice->rad_info.max_se;
1833 			properties->shaderArraysPerEngineCount =
1834 				pdevice->rad_info.max_sh_per_se;
1835 			properties->computeUnitsPerShaderArray =
1836 				pdevice->rad_info.min_good_cu_per_sa;
1837 			properties->simdPerComputeUnit =
1838 				pdevice->rad_info.num_simd_per_compute_unit;
1839 			properties->wavefrontsPerSimd =
1840 				pdevice->rad_info.max_wave64_per_simd;
1841 			properties->wavefrontSize = 64;
1842 
1843 			/* SGPR. */
1844 			properties->sgprsPerSimd =
1845 				pdevice->rad_info.num_physical_sgprs_per_simd;
1846 			properties->minSgprAllocation =
1847 				pdevice->rad_info.min_sgpr_alloc;
1848 			properties->maxSgprAllocation =
1849 				pdevice->rad_info.max_sgpr_alloc;
1850 			properties->sgprAllocationGranularity =
1851 				pdevice->rad_info.sgpr_alloc_granularity;
1852 
1853 			/* VGPR. */
1854 			properties->vgprsPerSimd =
1855 				pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
1856 			properties->minVgprAllocation =
1857 				pdevice->rad_info.min_wave64_vgpr_alloc;
1858 			properties->maxVgprAllocation =
1859 				pdevice->rad_info.max_vgpr_alloc;
1860 			properties->vgprAllocationGranularity =
1861 				pdevice->rad_info.wave64_vgpr_alloc_granularity;
1862 			break;
1863 		}
1864 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
1865 			VkPhysicalDeviceShaderCoreProperties2AMD *properties =
1866 				(VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
1867 
1868 			properties->shaderCoreFeatures = 0;
1869 			properties->activeComputeUnitCount =
1870 				pdevice->rad_info.num_good_compute_units;
1871 			break;
1872 		}
1873 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1874 			VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1875 				(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1876 			properties->maxVertexAttribDivisor = UINT32_MAX;
1877 			break;
1878 		}
1879 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
1880 			VkPhysicalDeviceDescriptorIndexingProperties *properties =
1881 				(VkPhysicalDeviceDescriptorIndexingProperties*)ext;
1882 			CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
1883 			CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
1884 			CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
1885 			CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
1886 			CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
1887 			CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
1888 			CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
1889 			CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
1890 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
1891 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
1892 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
1893 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
1894 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
1895 			CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
1896 			CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
1897 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
1898 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
1899 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
1900 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
1901 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
1902 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
1903 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
1904 			CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
1905 			break;
1906 		}
1907 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1908 			VkPhysicalDeviceProtectedMemoryProperties *properties =
1909 				(VkPhysicalDeviceProtectedMemoryProperties *)ext;
1910 			CORE_PROPERTY(1, 1, protectedNoFault);
1911 			break;
1912 		}
1913 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1914 			VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1915 				(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1916 			properties->primitiveOverestimationSize = 0;
1917 			properties->maxExtraPrimitiveOverestimationSize = 0;
1918 			properties->extraPrimitiveOverestimationSizeGranularity = 0;
1919 			properties->primitiveUnderestimation = false;
1920 			properties->conservativePointAndLineRasterization = false;
1921 			properties->degenerateTrianglesRasterized = false;
1922 			properties->degenerateLinesRasterized = false;
1923 			properties->fullyCoveredFragmentShaderInputVariable = false;
1924 			properties->conservativeRasterizationPostDepthCoverage = false;
1925 			break;
1926 		}
1927 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1928 			VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1929 				(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1930 			properties->pciDomain = pdevice->bus_info.domain;
1931 			properties->pciBus = pdevice->bus_info.bus;
1932 			properties->pciDevice = pdevice->bus_info.dev;
1933 			properties->pciFunction = pdevice->bus_info.func;
1934 			break;
1935 		}
1936 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
1937 			VkPhysicalDeviceDriverProperties *properties =
1938 				(VkPhysicalDeviceDriverProperties *) ext;
1939 			CORE_PROPERTY(1, 2, driverID);
1940 			CORE_PROPERTY(1, 2, driverName);
1941 			CORE_PROPERTY(1, 2, driverInfo);
1942 			CORE_PROPERTY(1, 2, conformanceVersion);
1943 			break;
1944 		}
1945 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1946 			VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1947 				(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1948 			properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1949 			properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1950 			properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1951 			properties->maxTransformFeedbackStreamDataSize = 512;
1952 			properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
1953 			properties->maxTransformFeedbackBufferDataStride = 512;
1954 			properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
1955 			properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
1956 			properties->transformFeedbackRasterizationStreamSelect = false;
1957 			properties->transformFeedbackDraw = true;
1958 			break;
1959 		}
1960 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
1961 			VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
1962 				(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
1963 
1964 			props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1965 			props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1966 			props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1967 			props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1968 			props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1969 			break;
1970 		}
1971 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1972 			VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1973 				(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1974 			properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
1975 								 VK_SAMPLE_COUNT_4_BIT |
1976 								 VK_SAMPLE_COUNT_8_BIT;
1977 			properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
1978 			properties->sampleLocationCoordinateRange[0] = 0.0f;
1979 			properties->sampleLocationCoordinateRange[1] = 0.9375f;
1980 			properties->sampleLocationSubPixelBits = 4;
1981 			properties->variableSampleLocations = false;
1982 			break;
1983 		}
1984 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
1985 			VkPhysicalDeviceDepthStencilResolveProperties *properties =
1986 				(VkPhysicalDeviceDepthStencilResolveProperties *)ext;
1987 			CORE_PROPERTY(1, 2, supportedDepthResolveModes);
1988 			CORE_PROPERTY(1, 2, supportedStencilResolveModes);
1989 			CORE_PROPERTY(1, 2, independentResolveNone);
1990 			CORE_PROPERTY(1, 2, independentResolve);
1991 			break;
1992 		}
1993 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
1994 			VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
1995 				(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
1996 			properties->storageTexelBufferOffsetAlignmentBytes = 4;
1997 			properties->storageTexelBufferOffsetSingleTexelAlignment = true;
1998 			properties->uniformTexelBufferOffsetAlignmentBytes = 4;
1999 			properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
2000 			break;
2001 		}
2002 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : {
2003 			VkPhysicalDeviceFloatControlsProperties *properties =
2004 				(VkPhysicalDeviceFloatControlsProperties *)ext;
2005 			CORE_PROPERTY(1, 2, denormBehaviorIndependence);
2006 			CORE_PROPERTY(1, 2, roundingModeIndependence);
2007 			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
2008 			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
2009 			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
2010 			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
2011 			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
2012 			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
2013 			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
2014 			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
2015 			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
2016 			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
2017 			CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
2018 			CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
2019 			CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
2020 			CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
2021 			CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
2022 			break;
2023 		}
2024 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
2025 			VkPhysicalDeviceTimelineSemaphoreProperties *properties =
2026 				(VkPhysicalDeviceTimelineSemaphoreProperties *) ext;
2027 			CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
2028 			break;
2029 		}
2030 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2031 			VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2032 				(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2033 			props->minSubgroupSize = 64;
2034 			props->maxSubgroupSize = 64;
2035 			props->maxComputeWorkgroupSubgroups = UINT32_MAX;
2036 			props->requiredSubgroupSizeStages = 0;
2037 
2038 			if (pdevice->rad_info.chip_class >= GFX10) {
2039 				/* Only GFX10+ supports wave32. */
2040 				props->minSubgroupSize = 32;
2041 				props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2042 			}
2043 			break;
2044 		}
2045 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
2046 			radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
2047 			break;
2048 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
2049 			radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
2050 			break;
2051 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2052 			VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2053 				(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2054 			props->lineSubPixelPrecisionBits = 4;
2055 			break;
2056 		}
2057 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2058 			VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2059 				(VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2060 			properties->robustStorageBufferAccessSizeAlignment = 4;
2061 			properties->robustUniformBufferAccessSizeAlignment = 4;
2062 			break;
2063 		}
2064 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2065 			VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2066 				(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2067 			props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2068 			break;
2069 		}
2070 		default:
2071 			break;
2072 		}
2073 	}
2074 }
2075 
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdevice,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2076 static void radv_get_physical_device_queue_family_properties(
2077 	struct radv_physical_device*                pdevice,
2078 	uint32_t*                                   pCount,
2079 	VkQueueFamilyProperties**                    pQueueFamilyProperties)
2080 {
2081 	int num_queue_families = 1;
2082 	int idx;
2083 	if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2084 	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2085 		num_queue_families++;
2086 
2087 	if (pQueueFamilyProperties == NULL) {
2088 		*pCount = num_queue_families;
2089 		return;
2090 	}
2091 
2092 	if (!*pCount)
2093 		return;
2094 
2095 	idx = 0;
2096 	if (*pCount >= 1) {
2097 		*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2098 			.queueFlags = VK_QUEUE_GRAPHICS_BIT |
2099 			              VK_QUEUE_COMPUTE_BIT |
2100 			              VK_QUEUE_TRANSFER_BIT |
2101 			              VK_QUEUE_SPARSE_BINDING_BIT,
2102 			.queueCount = 1,
2103 			.timestampValidBits = 64,
2104 			.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2105 		};
2106 		idx++;
2107 	}
2108 
2109 	if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2110 	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2111 		if (*pCount > idx) {
2112 			*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2113 				.queueFlags = VK_QUEUE_COMPUTE_BIT |
2114 				              VK_QUEUE_TRANSFER_BIT |
2115 				              VK_QUEUE_SPARSE_BINDING_BIT,
2116 				.queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2117 				.timestampValidBits = 64,
2118 				.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2119 			};
2120 			idx++;
2121 		}
2122 	}
2123 	*pCount = idx;
2124 }
2125 
radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)2126 void radv_GetPhysicalDeviceQueueFamilyProperties(
2127 	VkPhysicalDevice                            physicalDevice,
2128 	uint32_t*                                   pCount,
2129 	VkQueueFamilyProperties*                    pQueueFamilyProperties)
2130 {
2131 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2132 	if (!pQueueFamilyProperties) {
2133 		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2134 		return;
2135 	}
2136 	VkQueueFamilyProperties *properties[] = {
2137 		pQueueFamilyProperties + 0,
2138 		pQueueFamilyProperties + 1,
2139 		pQueueFamilyProperties + 2,
2140 	};
2141 	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2142 	assert(*pCount <= 3);
2143 }
2144 
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2145 void radv_GetPhysicalDeviceQueueFamilyProperties2(
2146 	VkPhysicalDevice                            physicalDevice,
2147 	uint32_t*                                   pCount,
2148 	VkQueueFamilyProperties2                   *pQueueFamilyProperties)
2149 {
2150 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2151 	if (!pQueueFamilyProperties) {
2152 		radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2153 		return;
2154 	}
2155 	VkQueueFamilyProperties *properties[] = {
2156 		&pQueueFamilyProperties[0].queueFamilyProperties,
2157 		&pQueueFamilyProperties[1].queueFamilyProperties,
2158 		&pQueueFamilyProperties[2].queueFamilyProperties,
2159 	};
2160 	radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2161 	assert(*pCount <= 3);
2162 }
2163 
radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2164 void radv_GetPhysicalDeviceMemoryProperties(
2165 	VkPhysicalDevice                            physicalDevice,
2166 	VkPhysicalDeviceMemoryProperties           *pMemoryProperties)
2167 {
2168 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2169 
2170 	*pMemoryProperties = physical_device->memory_properties;
2171 }
2172 
2173 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2174 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2175 				  VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2176 {
2177 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2178 	VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2179 	uint64_t visible_vram_size = radv_get_visible_vram_size(device);
2180 	uint64_t vram_size = radv_get_vram_size(device);
2181 	uint64_t gtt_size = device->rad_info.gart_size;
2182 	uint64_t heap_budget, heap_usage;
2183 
2184 	/* For all memory heaps, the computation of budget is as follow:
2185 	 *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2186 	 *
2187 	 * The Vulkan spec 1.1.97 says that the budget should include any
2188 	 * currently allocated device memory.
2189 	 *
2190 	 * Note that the application heap usages are not really accurate (eg.
2191 	 * in presence of shared buffers).
2192 	 */
2193 	for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
2194 		uint32_t heap_index = device->memory_properties.memoryTypes[i].heapIndex;
2195 
2196 		if ((device->memory_domains[i] & RADEON_DOMAIN_VRAM) && (device->memory_flags[i] & RADEON_FLAG_NO_CPU_ACCESS)) {
2197 			heap_usage = device->ws->query_value(device->ws,
2198 							     RADEON_ALLOCATED_VRAM);
2199 
2200 			heap_budget = vram_size -
2201 				device->ws->query_value(device->ws, RADEON_VRAM_USAGE) +
2202 				heap_usage;
2203 
2204 			memoryBudget->heapBudget[heap_index] = heap_budget;
2205 			memoryBudget->heapUsage[heap_index] = heap_usage;
2206 		} else if (device->memory_domains[i] & RADEON_DOMAIN_VRAM) {
2207 			heap_usage = device->ws->query_value(device->ws,
2208 							     RADEON_ALLOCATED_VRAM_VIS);
2209 
2210 			heap_budget = visible_vram_size -
2211 				device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2212 				heap_usage;
2213 
2214 			memoryBudget->heapBudget[heap_index] = heap_budget;
2215 			memoryBudget->heapUsage[heap_index] = heap_usage;
2216 		} else {
2217 			assert(device->memory_domains[i] & RADEON_DOMAIN_GTT);
2218 
2219 			heap_usage = device->ws->query_value(device->ws,
2220 							     RADEON_ALLOCATED_GTT);
2221 
2222 			heap_budget = gtt_size -
2223 				device->ws->query_value(device->ws, RADEON_GTT_USAGE) +
2224 				heap_usage;
2225 
2226 			memoryBudget->heapBudget[heap_index] = heap_budget;
2227 			memoryBudget->heapUsage[heap_index] = heap_usage;
2228 		}
2229 	}
2230 
2231 	/* The heapBudget and heapUsage values must be zero for array elements
2232 	 * greater than or equal to
2233 	 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2234 	 */
2235 	for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2236 		memoryBudget->heapBudget[i] = 0;
2237 		memoryBudget->heapUsage[i] = 0;
2238 	}
2239 }
2240 
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2241 void radv_GetPhysicalDeviceMemoryProperties2(
2242 	VkPhysicalDevice                            physicalDevice,
2243 	VkPhysicalDeviceMemoryProperties2          *pMemoryProperties)
2244 {
2245 	radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2246 					       &pMemoryProperties->memoryProperties);
2247 
2248 	VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2249 		vk_find_struct(pMemoryProperties->pNext,
2250 			       PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2251 	if (memory_budget)
2252 		radv_get_memory_budget_properties(physicalDevice, memory_budget);
2253 }
2254 
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)2255 VkResult radv_GetMemoryHostPointerPropertiesEXT(
2256 	VkDevice                                    _device,
2257 	VkExternalMemoryHandleTypeFlagBits          handleType,
2258 	const void                                 *pHostPointer,
2259 	VkMemoryHostPointerPropertiesEXT           *pMemoryHostPointerProperties)
2260 {
2261 	RADV_FROM_HANDLE(radv_device, device, _device);
2262 
2263 	switch (handleType)
2264 	{
2265 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2266 		const struct radv_physical_device *physical_device = device->physical_device;
2267 		uint32_t memoryTypeBits = 0;
2268 		for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2269 			if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2270 			    !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2271 				memoryTypeBits = (1 << i);
2272 				break;
2273 			}
2274 		}
2275 		pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2276 		return VK_SUCCESS;
2277 	}
2278 	default:
2279 		return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2280 	}
2281 }
2282 
2283 static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT * pObj)2284 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2285 {
2286 	/* Default to MEDIUM when a specific global priority isn't requested */
2287 	if (!pObj)
2288 		return RADEON_CTX_PRIORITY_MEDIUM;
2289 
2290 	switch(pObj->globalPriority) {
2291 	case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2292 		return RADEON_CTX_PRIORITY_REALTIME;
2293 	case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2294 		return RADEON_CTX_PRIORITY_HIGH;
2295 	case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2296 		return RADEON_CTX_PRIORITY_MEDIUM;
2297 	case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2298 		return RADEON_CTX_PRIORITY_LOW;
2299 	default:
2300 		unreachable("Illegal global priority value");
2301 		return RADEON_CTX_PRIORITY_INVALID;
2302 	}
2303 }
2304 
2305 static int
radv_queue_init(struct radv_device * device,struct radv_queue * queue,uint32_t queue_family_index,int idx,VkDeviceQueueCreateFlags flags,const VkDeviceQueueGlobalPriorityCreateInfoEXT * global_priority)2306 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
2307 		uint32_t queue_family_index, int idx,
2308 		VkDeviceQueueCreateFlags flags,
2309 		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2310 {
2311 	queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
2312 	queue->device = device;
2313 	queue->queue_family_index = queue_family_index;
2314 	queue->queue_idx = idx;
2315 	queue->priority = radv_get_queue_global_priority(global_priority);
2316 	queue->flags = flags;
2317 	queue->hw_ctx = NULL;
2318 
2319 	VkResult result = device->ws->ctx_create(device->ws, queue->priority, &queue->hw_ctx);
2320 	if (result != VK_SUCCESS)
2321 		return vk_error(device->instance, result);
2322 
2323 	list_inithead(&queue->pending_submissions);
2324 	pthread_mutex_init(&queue->pending_mutex, NULL);
2325 
2326 	pthread_mutex_init(&queue->thread_mutex, NULL);
2327 	queue->thread_submission = NULL;
2328 	queue->thread_running = queue->thread_exit = false;
2329 	result = radv_create_pthread_cond(&queue->thread_cond);
2330 	if (result != VK_SUCCESS)
2331 		return vk_error(device->instance, result);
2332 
2333 	return VK_SUCCESS;
2334 }
2335 
2336 static void
radv_queue_finish(struct radv_queue * queue)2337 radv_queue_finish(struct radv_queue *queue)
2338 {
2339 	if (queue->thread_running) {
2340 		p_atomic_set(&queue->thread_exit, true);
2341 		pthread_cond_broadcast(&queue->thread_cond);
2342 		pthread_join(queue->submission_thread, NULL);
2343 	}
2344 	pthread_cond_destroy(&queue->thread_cond);
2345 	pthread_mutex_destroy(&queue->pending_mutex);
2346 	pthread_mutex_destroy(&queue->thread_mutex);
2347 
2348 	if (queue->hw_ctx)
2349 		queue->device->ws->ctx_destroy(queue->hw_ctx);
2350 
2351 	if (queue->initial_full_flush_preamble_cs)
2352 		queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2353 	if (queue->initial_preamble_cs)
2354 		queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2355 	if (queue->continue_preamble_cs)
2356 		queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2357 	if (queue->descriptor_bo)
2358 		queue->device->ws->buffer_destroy(queue->descriptor_bo);
2359 	if (queue->scratch_bo)
2360 		queue->device->ws->buffer_destroy(queue->scratch_bo);
2361 	if (queue->esgs_ring_bo)
2362 		queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2363 	if (queue->gsvs_ring_bo)
2364 		queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2365 	if (queue->tess_rings_bo)
2366 		queue->device->ws->buffer_destroy(queue->tess_rings_bo);
2367 	if (queue->gds_bo)
2368 		queue->device->ws->buffer_destroy(queue->gds_bo);
2369 	if (queue->gds_oa_bo)
2370 		queue->device->ws->buffer_destroy(queue->gds_oa_bo);
2371 	if (queue->compute_scratch_bo)
2372 		queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2373 }
2374 
2375 static void
radv_bo_list_init(struct radv_bo_list * bo_list)2376 radv_bo_list_init(struct radv_bo_list *bo_list)
2377 {
2378 	pthread_rwlock_init(&bo_list->rwlock, NULL);
2379 	bo_list->list.count = bo_list->capacity = 0;
2380 	bo_list->list.bos = NULL;
2381 }
2382 
2383 static void
radv_bo_list_finish(struct radv_bo_list * bo_list)2384 radv_bo_list_finish(struct radv_bo_list *bo_list)
2385 {
2386 	free(bo_list->list.bos);
2387 	pthread_rwlock_destroy(&bo_list->rwlock);
2388 }
2389 
radv_bo_list_add(struct radv_device * device,struct radeon_winsys_bo * bo)2390 VkResult radv_bo_list_add(struct radv_device *device,
2391 			  struct radeon_winsys_bo *bo)
2392 {
2393 	struct radv_bo_list *bo_list = &device->bo_list;
2394 
2395 	if (bo->is_local)
2396 		return VK_SUCCESS;
2397 
2398 	if (unlikely(!device->use_global_bo_list))
2399 		return VK_SUCCESS;
2400 
2401 	pthread_rwlock_wrlock(&bo_list->rwlock);
2402 	if (bo_list->list.count == bo_list->capacity) {
2403 		unsigned capacity = MAX2(4, bo_list->capacity * 2);
2404 		void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
2405 
2406 		if (!data) {
2407 			pthread_rwlock_unlock(&bo_list->rwlock);
2408 			return VK_ERROR_OUT_OF_HOST_MEMORY;
2409 		}
2410 
2411 		bo_list->list.bos = (struct radeon_winsys_bo**)data;
2412 		bo_list->capacity = capacity;
2413 	}
2414 
2415 	bo_list->list.bos[bo_list->list.count++] = bo;
2416 	pthread_rwlock_unlock(&bo_list->rwlock);
2417 	return VK_SUCCESS;
2418 }
2419 
radv_bo_list_remove(struct radv_device * device,struct radeon_winsys_bo * bo)2420 void radv_bo_list_remove(struct radv_device *device,
2421 			 struct radeon_winsys_bo *bo)
2422 {
2423 	struct radv_bo_list *bo_list = &device->bo_list;
2424 
2425 	if (bo->is_local)
2426 		return;
2427 
2428 	if (unlikely(!device->use_global_bo_list))
2429 		return;
2430 
2431 	pthread_rwlock_wrlock(&bo_list->rwlock);
2432 	/* Loop the list backwards so we find the most recently added
2433 	 * memory first. */
2434 	for(unsigned i = bo_list->list.count; i-- > 0;) {
2435 		if (bo_list->list.bos[i] == bo) {
2436 			bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
2437 			--bo_list->list.count;
2438 			break;
2439 		}
2440 	}
2441 	pthread_rwlock_unlock(&bo_list->rwlock);
2442 }
2443 
2444 static void
radv_device_init_gs_info(struct radv_device * device)2445 radv_device_init_gs_info(struct radv_device *device)
2446 {
2447 	device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2448 						       device->physical_device->rad_info.family);
2449 }
2450 
radv_get_device_extension_index(const char * name)2451 static int radv_get_device_extension_index(const char *name)
2452 {
2453 	for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
2454 		if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
2455 			return i;
2456 	}
2457 	return -1;
2458 }
2459 
2460 static int
radv_get_int_debug_option(const char * name,int default_value)2461 radv_get_int_debug_option(const char *name, int default_value)
2462 {
2463 	const char *str;
2464 	int result;
2465 
2466 	str = getenv(name);
2467 	if (!str) {
2468 		result = default_value;
2469 	} else {
2470 		char *endptr;
2471 
2472 		result = strtol(str, &endptr, 0);
2473 		if (str == endptr) {
2474 			/* No digits founs. */
2475 			result = default_value;
2476 		}
2477 	}
2478 
2479 	return result;
2480 }
2481 
2482 static void
radv_device_init_dispatch(struct radv_device * device)2483 radv_device_init_dispatch(struct radv_device *device)
2484 {
2485 	const struct radv_instance *instance = device->physical_device->instance;
2486 	const struct radv_device_dispatch_table *dispatch_table_layer = NULL;
2487 	bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
2488 	int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
2489 
2490 	if (radv_thread_trace >= 0) {
2491 		/* Use device entrypoints from the SQTT layer if enabled. */
2492 		dispatch_table_layer = &sqtt_device_dispatch_table;
2493 	}
2494 
2495 	for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) {
2496 		/* Vulkan requires that entrypoints for extensions which have not been
2497 		 * enabled must not be advertised.
2498 		 */
2499 		if (!unchecked &&
2500 		    !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
2501 						       &instance->enabled_extensions,
2502 						       &device->enabled_extensions)) {
2503 			device->dispatch.entrypoints[i] = NULL;
2504 		} else if (dispatch_table_layer &&
2505 			   dispatch_table_layer->entrypoints[i]) {
2506 			device->dispatch.entrypoints[i] =
2507 				dispatch_table_layer->entrypoints[i];
2508 		} else {
2509 			device->dispatch.entrypoints[i] =
2510 				radv_device_dispatch_table.entrypoints[i];
2511 		}
2512 	}
2513 }
2514 
2515 static VkResult
radv_create_pthread_cond(pthread_cond_t * cond)2516 radv_create_pthread_cond(pthread_cond_t *cond)
2517 {
2518 	pthread_condattr_t condattr;
2519 	if (pthread_condattr_init(&condattr)) {
2520 		return VK_ERROR_INITIALIZATION_FAILED;
2521 	}
2522 
2523 	if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) {
2524 		pthread_condattr_destroy(&condattr);
2525 		return VK_ERROR_INITIALIZATION_FAILED;
2526 	}
2527 	if (pthread_cond_init(cond, &condattr)) {
2528 		pthread_condattr_destroy(&condattr);
2529 		return VK_ERROR_INITIALIZATION_FAILED;
2530 	}
2531 	pthread_condattr_destroy(&condattr);
2532 	return VK_SUCCESS;
2533 }
2534 
2535 static VkResult
check_physical_device_features(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceFeatures * features)2536 check_physical_device_features(VkPhysicalDevice physicalDevice,
2537 			       const VkPhysicalDeviceFeatures *features)
2538 {
2539 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2540 	VkPhysicalDeviceFeatures supported_features;
2541 	radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
2542 	VkBool32 *supported_feature = (VkBool32 *)&supported_features;
2543 	VkBool32 *enabled_feature = (VkBool32 *)features;
2544 	unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
2545 	for (uint32_t i = 0; i < num_features; i++) {
2546 		if (enabled_feature[i] && !supported_feature[i])
2547 			return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
2548 	}
2549 
2550 	return VK_SUCCESS;
2551 }
2552 
radv_device_init_border_color(struct radv_device * device)2553 static VkResult radv_device_init_border_color(struct radv_device *device)
2554 {
2555 	device->border_color_data.bo =
2556 	device->ws->buffer_create(device->ws,
2557 					RADV_BORDER_COLOR_BUFFER_SIZE,
2558 					4096,
2559 					RADEON_DOMAIN_VRAM,
2560 					RADEON_FLAG_CPU_ACCESS |
2561 					RADEON_FLAG_READ_ONLY |
2562 					RADEON_FLAG_NO_INTERPROCESS_SHARING,
2563 					RADV_BO_PRIORITY_SHADER);
2564 
2565 	if (device->border_color_data.bo == NULL)
2566 		return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2567 
2568 	device->border_color_data.colors_gpu_ptr =
2569 		device->ws->buffer_map(device->border_color_data.bo);
2570 	if (!device->border_color_data.colors_gpu_ptr)
2571 		return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2572 	pthread_mutex_init(&device->border_color_data.mutex, NULL);
2573 
2574 	return VK_SUCCESS;
2575 }
2576 
radv_device_finish_border_color(struct radv_device * device)2577 static void radv_device_finish_border_color(struct radv_device *device)
2578 {
2579 	if (device->border_color_data.bo) {
2580 		device->ws->buffer_destroy(device->border_color_data.bo);
2581 
2582 		pthread_mutex_destroy(&device->border_color_data.mutex);
2583 	}
2584 }
2585 
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2586 VkResult radv_CreateDevice(
2587 	VkPhysicalDevice                            physicalDevice,
2588 	const VkDeviceCreateInfo*                   pCreateInfo,
2589 	const VkAllocationCallbacks*                pAllocator,
2590 	VkDevice*                                   pDevice)
2591 {
2592 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2593 	VkResult result;
2594 	struct radv_device *device;
2595 
2596 	bool keep_shader_info = false;
2597 	bool robust_buffer_access = false;
2598 	bool overallocation_disallowed = false;
2599 	bool custom_border_colors = false;
2600 
2601 	/* Check enabled features */
2602 	if (pCreateInfo->pEnabledFeatures) {
2603 		result = check_physical_device_features(physicalDevice,
2604 							pCreateInfo->pEnabledFeatures);
2605 		if (result != VK_SUCCESS)
2606 			return result;
2607 
2608 		if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2609 			robust_buffer_access = true;
2610 	}
2611 
2612 	vk_foreach_struct_const(ext, pCreateInfo->pNext) {
2613 		switch (ext->sType) {
2614 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2615 			const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2616 			result = check_physical_device_features(physicalDevice,
2617 								&features->features);
2618 			if (result != VK_SUCCESS)
2619 				return result;
2620 
2621 			if (features->features.robustBufferAccess)
2622 				robust_buffer_access = true;
2623 			break;
2624 		}
2625 		case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
2626 			const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
2627 			if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
2628 				overallocation_disallowed = true;
2629 			break;
2630 		}
2631 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2632 			const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
2633 			custom_border_colors = border_color_features->customBorderColors;
2634 			break;
2635 		}
2636 		default:
2637 			break;
2638 		}
2639 	}
2640 
2641 	device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
2642 			    sizeof(*device), 8,
2643 			    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2644 	if (!device)
2645 		return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2646 
2647 	vk_device_init(&device->vk, pCreateInfo,
2648 		       &physical_device->instance->alloc, pAllocator);
2649 
2650 	device->instance = physical_device->instance;
2651 	device->physical_device = physical_device;
2652 
2653 	device->ws = physical_device->ws;
2654 
2655 	for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
2656 		const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
2657 		int index = radv_get_device_extension_index(ext_name);
2658 		if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
2659 			vk_free(&device->vk.alloc, device);
2660 			return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
2661 		}
2662 
2663 		device->enabled_extensions.extensions[index] = true;
2664 	}
2665 
2666 	radv_device_init_dispatch(device);
2667 
2668 	keep_shader_info = device->enabled_extensions.AMD_shader_info;
2669 
2670 	/* With update after bind we can't attach bo's to the command buffer
2671 	 * from the descriptor set anymore, so we have to use a global BO list.
2672 	 */
2673 	device->use_global_bo_list =
2674 		(device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
2675 		device->enabled_extensions.EXT_descriptor_indexing ||
2676 		device->enabled_extensions.EXT_buffer_device_address ||
2677 		device->enabled_extensions.KHR_buffer_device_address;
2678 
2679 	device->robust_buffer_access = robust_buffer_access;
2680 
2681 	mtx_init(&device->shader_slab_mutex, mtx_plain);
2682 	list_inithead(&device->shader_slabs);
2683 
2684 	device->overallocation_disallowed = overallocation_disallowed;
2685 	mtx_init(&device->overallocation_mutex, mtx_plain);
2686 
2687 	radv_bo_list_init(&device->bo_list);
2688 
2689 	for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2690 		const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
2691 		uint32_t qfi = queue_create->queueFamilyIndex;
2692 		const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
2693 			vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
2694 
2695 		assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
2696 
2697 		device->queues[qfi] = vk_alloc(&device->vk.alloc,
2698 					       queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2699 		if (!device->queues[qfi]) {
2700 			result = VK_ERROR_OUT_OF_HOST_MEMORY;
2701 			goto fail;
2702 		}
2703 
2704 		memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
2705 
2706 		device->queue_count[qfi] = queue_create->queueCount;
2707 
2708 		for (unsigned q = 0; q < queue_create->queueCount; q++) {
2709 			result = radv_queue_init(device, &device->queues[qfi][q],
2710 						 qfi, q, queue_create->flags,
2711 						 global_priority);
2712 			if (result != VK_SUCCESS)
2713 				goto fail;
2714 		}
2715 	}
2716 
2717 	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
2718 			      !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
2719 
2720 	/* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
2721 	device->dfsm_allowed = device->pbb_allowed &&
2722 	                       (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
2723 
2724 	device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
2725 
2726 	/* The maximum number of scratch waves. Scratch space isn't divided
2727 	 * evenly between CUs. The number is only a function of the number of CUs.
2728 	 * We can decrease the constant to decrease the scratch buffer size.
2729 	 *
2730 	 * sctx->scratch_waves must be >= the maximum possible size of
2731 	 * 1 threadgroup, so that the hw doesn't hang from being unable
2732 	 * to start any.
2733 	 *
2734 	 * The recommended value is 4 per CU at most. Higher numbers don't
2735 	 * bring much benefit, but they still occupy chip resources (think
2736 	 * async compute). I've seen ~2% performance difference between 4 and 32.
2737 	 */
2738 	uint32_t max_threads_per_block = 2048;
2739 	device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
2740 				     max_threads_per_block / 64);
2741 
2742 	device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
2743 
2744 	if (device->physical_device->rad_info.chip_class >= GFX7) {
2745 		/* If the KMD allows it (there is a KMD hw register for it),
2746 		 * allow launching waves out-of-order.
2747 		 */
2748 		device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
2749 	}
2750 
2751 	radv_device_init_gs_info(device);
2752 
2753 	device->tess_offchip_block_dw_size =
2754 		device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
2755 
2756 	if (getenv("RADV_TRACE_FILE")) {
2757 		const char *filename = getenv("RADV_TRACE_FILE");
2758 
2759 		keep_shader_info = true;
2760 
2761 		if (!radv_init_trace(device))
2762 			goto fail;
2763 
2764 		fprintf(stderr, "*****************************************************************************\n");
2765 		fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
2766 		fprintf(stderr, "*****************************************************************************\n");
2767 
2768 		fprintf(stderr, "Trace file will be dumped to %s\n", filename);
2769 		radv_dump_enabled_options(device, stderr);
2770 	}
2771 
2772 	int radv_thread_trace = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
2773 	if (radv_thread_trace >= 0) {
2774 		fprintf(stderr, "*************************************************\n");
2775 		fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
2776 		fprintf(stderr, "*************************************************\n");
2777 
2778 		if (device->physical_device->rad_info.chip_class < GFX8) {
2779 			fprintf(stderr, "GPU hardware not supported: refer to "
2780 					"the RGP documentation for the list of "
2781 					"supported GPUs!\n");
2782 			abort();
2783 		}
2784 
2785 		/* Default buffer size set to 1MB per SE. */
2786 		device->thread_trace_buffer_size =
2787 			radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
2788 		device->thread_trace_start_frame = radv_thread_trace;
2789 
2790 		if (!radv_thread_trace_init(device))
2791 			goto fail;
2792 	}
2793 
2794 	device->keep_shader_info = keep_shader_info;
2795 	result = radv_device_init_meta(device);
2796 	if (result != VK_SUCCESS)
2797 		goto fail;
2798 
2799 	radv_device_init_msaa(device);
2800 
2801  	/* If the border color extension is enabled, let's create the buffer we need. */
2802 	if (custom_border_colors) {
2803 		result = radv_device_init_border_color(device);
2804 		if (result != VK_SUCCESS)
2805 			goto fail;
2806 	}
2807 
2808 	for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
2809 		device->empty_cs[family] = device->ws->cs_create(device->ws, family);
2810 		if (!device->empty_cs[family])
2811 			goto fail;
2812 
2813 		switch (family) {
2814 		case RADV_QUEUE_GENERAL:
2815 			radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2816 			radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
2817 			radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
2818 			break;
2819 		case RADV_QUEUE_COMPUTE:
2820 			radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
2821 			radeon_emit(device->empty_cs[family], 0);
2822 			break;
2823 		}
2824 
2825 		result = device->ws->cs_finalize(device->empty_cs[family]);
2826 		if (result != VK_SUCCESS)
2827 			goto fail;
2828 	}
2829 
2830 	if (device->physical_device->rad_info.chip_class >= GFX7)
2831 		cik_create_gfx_config(device);
2832 
2833 	VkPipelineCacheCreateInfo ci;
2834 	ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
2835 	ci.pNext = NULL;
2836 	ci.flags = 0;
2837 	ci.pInitialData = NULL;
2838 	ci.initialDataSize = 0;
2839 	VkPipelineCache pc;
2840 	result = radv_CreatePipelineCache(radv_device_to_handle(device),
2841 					  &ci, NULL, &pc);
2842 	if (result != VK_SUCCESS)
2843 		goto fail_meta;
2844 
2845 	device->mem_cache = radv_pipeline_cache_from_handle(pc);
2846 
2847 	result = radv_create_pthread_cond(&device->timeline_cond);
2848 	if (result != VK_SUCCESS)
2849 		goto fail_mem_cache;
2850 
2851 	device->force_aniso =
2852 		MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
2853 	if (device->force_aniso >= 0) {
2854 		fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
2855 			1 << util_logbase2(device->force_aniso));
2856 	}
2857 
2858 	*pDevice = radv_device_to_handle(device);
2859 	return VK_SUCCESS;
2860 
2861 fail_mem_cache:
2862 	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2863 fail_meta:
2864 	radv_device_finish_meta(device);
2865 fail:
2866 	radv_bo_list_finish(&device->bo_list);
2867 
2868 	radv_thread_trace_finish(device);
2869 
2870 	if (device->trace_bo)
2871 		device->ws->buffer_destroy(device->trace_bo);
2872 
2873 	if (device->gfx_init)
2874 		device->ws->buffer_destroy(device->gfx_init);
2875 
2876 	radv_device_finish_border_color(device);
2877 
2878 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2879 		for (unsigned q = 0; q < device->queue_count[i]; q++)
2880 			radv_queue_finish(&device->queues[i][q]);
2881 		if (device->queue_count[i])
2882 			vk_free(&device->vk.alloc, device->queues[i]);
2883 	}
2884 
2885 	vk_free(&device->vk.alloc, device);
2886 	return result;
2887 }
2888 
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2889 void radv_DestroyDevice(
2890 	VkDevice                                    _device,
2891 	const VkAllocationCallbacks*                pAllocator)
2892 {
2893 	RADV_FROM_HANDLE(radv_device, device, _device);
2894 
2895 	if (!device)
2896 		return;
2897 
2898 	if (device->trace_bo)
2899 		device->ws->buffer_destroy(device->trace_bo);
2900 
2901 	if (device->gfx_init)
2902 		device->ws->buffer_destroy(device->gfx_init);
2903 
2904 	radv_device_finish_border_color(device);
2905 
2906 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2907 		for (unsigned q = 0; q < device->queue_count[i]; q++)
2908 			radv_queue_finish(&device->queues[i][q]);
2909 		if (device->queue_count[i])
2910 			vk_free(&device->vk.alloc, device->queues[i]);
2911 		if (device->empty_cs[i])
2912 			device->ws->cs_destroy(device->empty_cs[i]);
2913 	}
2914 	radv_device_finish_meta(device);
2915 
2916 	VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
2917 	radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2918 
2919 	radv_destroy_shader_slabs(device);
2920 
2921 	pthread_cond_destroy(&device->timeline_cond);
2922 	radv_bo_list_finish(&device->bo_list);
2923 
2924 	radv_thread_trace_finish(device);
2925 
2926 	vk_free(&device->vk.alloc, device);
2927 }
2928 
radv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2929 VkResult radv_EnumerateInstanceLayerProperties(
2930 	uint32_t*                                   pPropertyCount,
2931 	VkLayerProperties*                          pProperties)
2932 {
2933 	if (pProperties == NULL) {
2934 		*pPropertyCount = 0;
2935 		return VK_SUCCESS;
2936 	}
2937 
2938 	/* None supported at this time */
2939 	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2940 }
2941 
radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)2942 VkResult radv_EnumerateDeviceLayerProperties(
2943 	VkPhysicalDevice                            physicalDevice,
2944 	uint32_t*                                   pPropertyCount,
2945 	VkLayerProperties*                          pProperties)
2946 {
2947 	if (pProperties == NULL) {
2948 		*pPropertyCount = 0;
2949 		return VK_SUCCESS;
2950 	}
2951 
2952 	/* None supported at this time */
2953 	return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2954 }
2955 
radv_GetDeviceQueue2(VkDevice _device,const VkDeviceQueueInfo2 * pQueueInfo,VkQueue * pQueue)2956 void radv_GetDeviceQueue2(
2957 	VkDevice                                    _device,
2958 	const VkDeviceQueueInfo2*                   pQueueInfo,
2959 	VkQueue*                                    pQueue)
2960 {
2961 	RADV_FROM_HANDLE(radv_device, device, _device);
2962 	struct radv_queue *queue;
2963 
2964 	queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
2965 	if (pQueueInfo->flags != queue->flags) {
2966 		/* From the Vulkan 1.1.70 spec:
2967 		 *
2968 		 * "The queue returned by vkGetDeviceQueue2 must have the same
2969 		 * flags value from this structure as that used at device
2970 		 * creation time in a VkDeviceQueueCreateInfo instance. If no
2971 		 * matching flags were specified at device creation time then
2972 		 * pQueue will return VK_NULL_HANDLE."
2973 		 */
2974 		*pQueue = VK_NULL_HANDLE;
2975 		return;
2976 	}
2977 
2978 	*pQueue = radv_queue_to_handle(queue);
2979 }
2980 
radv_GetDeviceQueue(VkDevice _device,uint32_t queueFamilyIndex,uint32_t queueIndex,VkQueue * pQueue)2981 void radv_GetDeviceQueue(
2982 	VkDevice                                    _device,
2983 	uint32_t                                    queueFamilyIndex,
2984 	uint32_t                                    queueIndex,
2985 	VkQueue*                                    pQueue)
2986 {
2987 	const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
2988 		.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
2989 		.queueFamilyIndex = queueFamilyIndex,
2990 		.queueIndex = queueIndex
2991 	};
2992 
2993 	radv_GetDeviceQueue2(_device, &info, pQueue);
2994 }
2995 
2996 static void
fill_geom_tess_rings(struct radv_queue * queue,uint32_t * map,bool add_sample_positions,uint32_t esgs_ring_size,struct radeon_winsys_bo * esgs_ring_bo,uint32_t gsvs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t tess_factor_ring_size,uint32_t tess_offchip_ring_offset,uint32_t tess_offchip_ring_size,struct radeon_winsys_bo * tess_rings_bo)2997 fill_geom_tess_rings(struct radv_queue *queue,
2998 		     uint32_t *map,
2999 		     bool add_sample_positions,
3000 		     uint32_t esgs_ring_size,
3001 		     struct radeon_winsys_bo *esgs_ring_bo,
3002 		     uint32_t gsvs_ring_size,
3003 		     struct radeon_winsys_bo *gsvs_ring_bo,
3004 		     uint32_t tess_factor_ring_size,
3005 		     uint32_t tess_offchip_ring_offset,
3006 		     uint32_t tess_offchip_ring_size,
3007 		     struct radeon_winsys_bo *tess_rings_bo)
3008 {
3009 	uint32_t *desc = &map[4];
3010 
3011 	if (esgs_ring_bo) {
3012 		uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3013 
3014 		/* stride 0, num records - size, add tid, swizzle, elsize4,
3015 		   index stride 64 */
3016 		desc[0] = esgs_va;
3017 		desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
3018 			  S_008F04_SWIZZLE_ENABLE(true);
3019 		desc[2] = esgs_ring_size;
3020 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3021 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3022 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3023 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3024 			  S_008F0C_INDEX_STRIDE(3) |
3025 			  S_008F0C_ADD_TID_ENABLE(1);
3026 
3027 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3028 			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3029 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3030 				   S_008F0C_RESOURCE_LEVEL(1);
3031 		} else {
3032 			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3033 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3034 				   S_008F0C_ELEMENT_SIZE(1);
3035 		}
3036 
3037 		/* GS entry for ES->GS ring */
3038 		/* stride 0, num records - size, elsize0,
3039 		   index stride 0 */
3040 		desc[4] = esgs_va;
3041 		desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3042 		desc[6] = esgs_ring_size;
3043 		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3044 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3045 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3046 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3047 
3048 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3049 			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3050 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3051 				   S_008F0C_RESOURCE_LEVEL(1);
3052 		} else {
3053 			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3054 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3055 		}
3056 	}
3057 
3058 	desc += 8;
3059 
3060 	if (gsvs_ring_bo) {
3061 		uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3062 
3063 		/* VS entry for GS->VS ring */
3064 		/* stride 0, num records - size, elsize0,
3065 		   index stride 0 */
3066 		desc[0] = gsvs_va;
3067 		desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3068 		desc[2] = gsvs_ring_size;
3069 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3070 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3071 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3072 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3073 
3074 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3075 			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3076 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3077 				   S_008F0C_RESOURCE_LEVEL(1);
3078 		} else {
3079 			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3080 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3081 		}
3082 
3083 		/* stride gsvs_itemsize, num records 64
3084 		   elsize 4, index stride 16 */
3085 		/* shader will patch stride and desc[2] */
3086 		desc[4] = gsvs_va;
3087 		desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
3088 			  S_008F04_SWIZZLE_ENABLE(1);
3089 		desc[6] = 0;
3090 		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3091 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3092 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3093 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3094 			  S_008F0C_INDEX_STRIDE(1) |
3095 			  S_008F0C_ADD_TID_ENABLE(true);
3096 
3097 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3098 			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3099 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3100 				   S_008F0C_RESOURCE_LEVEL(1);
3101 		} else {
3102 			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3103 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3104 				   S_008F0C_ELEMENT_SIZE(1);
3105 		}
3106 
3107 	}
3108 
3109 	desc += 8;
3110 
3111 	if (tess_rings_bo) {
3112 		uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3113 		uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3114 
3115 		desc[0] = tess_va;
3116 		desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3117 		desc[2] = tess_factor_ring_size;
3118 		desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3119 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3120 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3121 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3122 
3123 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3124 			desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3125 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3126 				   S_008F0C_RESOURCE_LEVEL(1);
3127 		} else {
3128 			desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3129 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3130 		}
3131 
3132 		desc[4] = tess_offchip_va;
3133 		desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3134 		desc[6] = tess_offchip_ring_size;
3135 		desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3136 			  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3137 			  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3138 			  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3139 
3140 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3141 			desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3142 				   S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3143 				   S_008F0C_RESOURCE_LEVEL(1);
3144 		} else {
3145 			desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3146 				   S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3147 		}
3148 	}
3149 
3150 	desc += 8;
3151 
3152 	if (add_sample_positions) {
3153 		/* add sample positions after all rings */
3154 		memcpy(desc, queue->device->sample_locations_1x, 8);
3155 		desc += 2;
3156 		memcpy(desc, queue->device->sample_locations_2x, 16);
3157 		desc += 4;
3158 		memcpy(desc, queue->device->sample_locations_4x, 32);
3159 		desc += 8;
3160 		memcpy(desc, queue->device->sample_locations_8x, 64);
3161 	}
3162 }
3163 
3164 static unsigned
radv_get_hs_offchip_param(struct radv_device * device,uint32_t * max_offchip_buffers_p)3165 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3166 {
3167 	bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3168 		device->physical_device->rad_info.family != CHIP_CARRIZO &&
3169 		device->physical_device->rad_info.family != CHIP_STONEY;
3170 	unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3171 	unsigned max_offchip_buffers;
3172 	unsigned offchip_granularity;
3173 	unsigned hs_offchip_param;
3174 
3175 	/*
3176 	 * Per RadeonSI:
3177 	 * This must be one less than the maximum number due to a hw limitation.
3178          * Various hardware bugs need thGFX7
3179 	 *
3180 	 * Per AMDVLK:
3181 	 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3182 	 * Gfx7 should limit max_offchip_buffers to 508
3183 	 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3184 	 *
3185 	 * Follow AMDVLK here.
3186 	 */
3187 	if (device->physical_device->rad_info.chip_class >= GFX10) {
3188 		max_offchip_buffers_per_se = 256;
3189 	} else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3190 		   device->physical_device->rad_info.chip_class == GFX7 ||
3191 		   device->physical_device->rad_info.chip_class == GFX6)
3192 		--max_offchip_buffers_per_se;
3193 
3194 	max_offchip_buffers = max_offchip_buffers_per_se *
3195 		device->physical_device->rad_info.max_se;
3196 
3197 	/* Hawaii has a bug with offchip buffers > 256 that can be worked
3198 	 * around by setting 4K granularity.
3199 	 */
3200 	if (device->tess_offchip_block_dw_size == 4096) {
3201 		assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3202 		offchip_granularity = V_03093C_X_4K_DWORDS;
3203 	} else {
3204 		assert(device->tess_offchip_block_dw_size == 8192);
3205 		offchip_granularity = V_03093C_X_8K_DWORDS;
3206 	}
3207 
3208 	switch (device->physical_device->rad_info.chip_class) {
3209 	case GFX6:
3210 		max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3211 		break;
3212 	case GFX7:
3213 	case GFX8:
3214 	case GFX9:
3215 		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3216 		break;
3217 	case GFX10:
3218 		break;
3219 	default:
3220 		break;
3221 	}
3222 
3223 	*max_offchip_buffers_p = max_offchip_buffers;
3224 	if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3225 		hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3226 				   S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3227 	} else if (device->physical_device->rad_info.chip_class >= GFX7) {
3228 		if (device->physical_device->rad_info.chip_class >= GFX8)
3229 			--max_offchip_buffers;
3230 		hs_offchip_param =
3231 			S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
3232 			S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
3233 	} else {
3234 		hs_offchip_param =
3235 			S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3236 	}
3237 	return hs_offchip_param;
3238 }
3239 
3240 static void
radv_emit_gs_ring_sizes(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * esgs_ring_bo,uint32_t esgs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t gsvs_ring_size)3241 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3242 			struct radeon_winsys_bo *esgs_ring_bo,
3243 			uint32_t esgs_ring_size,
3244 			struct radeon_winsys_bo *gsvs_ring_bo,
3245 			uint32_t gsvs_ring_size)
3246 {
3247 	if (!esgs_ring_bo && !gsvs_ring_bo)
3248 		return;
3249 
3250 	if (esgs_ring_bo)
3251 		radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3252 
3253 	if (gsvs_ring_bo)
3254 		radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3255 
3256 	if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3257 		radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3258 		radeon_emit(cs, esgs_ring_size >> 8);
3259 		radeon_emit(cs, gsvs_ring_size >> 8);
3260 	} else {
3261 		radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3262 		radeon_emit(cs, esgs_ring_size >> 8);
3263 		radeon_emit(cs, gsvs_ring_size >> 8);
3264 	}
3265 }
3266 
3267 static void
radv_emit_tess_factor_ring(struct radv_queue * queue,struct radeon_cmdbuf * cs,unsigned hs_offchip_param,unsigned tf_ring_size,struct radeon_winsys_bo * tess_rings_bo)3268 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3269 			   unsigned hs_offchip_param, unsigned tf_ring_size,
3270 			   struct radeon_winsys_bo *tess_rings_bo)
3271 {
3272 	uint64_t tf_va;
3273 
3274 	if (!tess_rings_bo)
3275 		return;
3276 
3277 	tf_va = radv_buffer_get_va(tess_rings_bo);
3278 
3279 	radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3280 
3281 	if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3282 		radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
3283 				       S_030938_SIZE(tf_ring_size / 4));
3284 		radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
3285 				       tf_va >> 8);
3286 
3287 		if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3288 			radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3289 					       S_030984_BASE_HI(tf_va >> 40));
3290 		} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3291 			radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
3292 					       S_030944_BASE_HI(tf_va >> 40));
3293 		}
3294 		radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
3295 				       hs_offchip_param);
3296 	} else {
3297 		radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
3298 				      S_008988_SIZE(tf_ring_size / 4));
3299 		radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
3300 				      tf_va >> 8);
3301 		radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
3302 				     hs_offchip_param);
3303 	}
3304 }
3305 
3306 static void
radv_emit_graphics_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * scratch_bo)3307 radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3308                            uint32_t size_per_wave, uint32_t waves,
3309                            struct radeon_winsys_bo *scratch_bo)
3310 {
3311 	if (queue->queue_family_index != RADV_QUEUE_GENERAL)
3312 		return;
3313 
3314 	if (!scratch_bo)
3315 		return;
3316 
3317 	radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3318 
3319 	radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
3320 	                       S_0286E8_WAVES(waves) |
3321 	                       S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3322 }
3323 
3324 static void
radv_emit_compute_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * compute_scratch_bo)3325 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3326                           uint32_t size_per_wave, uint32_t waves,
3327                           struct radeon_winsys_bo *compute_scratch_bo)
3328 {
3329 	uint64_t scratch_va;
3330 
3331 	if (!compute_scratch_bo)
3332 		return;
3333 
3334 	scratch_va = radv_buffer_get_va(compute_scratch_bo);
3335 
3336 	radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3337 
3338 	radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3339 	radeon_emit(cs, scratch_va);
3340 	radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3341 			S_008F04_SWIZZLE_ENABLE(1));
3342 
3343 	radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3344 	                 S_00B860_WAVES(waves) |
3345 	                 S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3346 }
3347 
3348 static void
radv_emit_global_shader_pointers(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * descriptor_bo)3349 radv_emit_global_shader_pointers(struct radv_queue *queue,
3350 				 struct radeon_cmdbuf *cs,
3351 				 struct radeon_winsys_bo *descriptor_bo)
3352 {
3353 	uint64_t va;
3354 
3355 	if (!descriptor_bo)
3356 		return;
3357 
3358 	va = radv_buffer_get_va(descriptor_bo);
3359 
3360 	radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3361 
3362 	if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3363 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3364 				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
3365 				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3366 				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3367 
3368 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3369 			radv_emit_shader_pointer(queue->device, cs, regs[i],
3370 						 va, true);
3371 		}
3372 	} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3373 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3374 				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
3375 				   R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3376 				   R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3377 
3378 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3379 			radv_emit_shader_pointer(queue->device, cs, regs[i],
3380 						 va, true);
3381 		}
3382 	} else {
3383 		uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3384 				   R_00B130_SPI_SHADER_USER_DATA_VS_0,
3385 				   R_00B230_SPI_SHADER_USER_DATA_GS_0,
3386 				   R_00B330_SPI_SHADER_USER_DATA_ES_0,
3387 				   R_00B430_SPI_SHADER_USER_DATA_HS_0,
3388 				   R_00B530_SPI_SHADER_USER_DATA_LS_0};
3389 
3390 		for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3391 			radv_emit_shader_pointer(queue->device, cs, regs[i],
3392 						 va, true);
3393 		}
3394 	}
3395 }
3396 
3397 static void
radv_init_graphics_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3398 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3399 {
3400 	struct radv_device *device = queue->device;
3401 
3402 	if (device->gfx_init) {
3403 		uint64_t va = radv_buffer_get_va(device->gfx_init);
3404 
3405 		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3406 		radeon_emit(cs, va);
3407 		radeon_emit(cs, va >> 32);
3408 		radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3409 
3410 		radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3411 	} else {
3412 		si_emit_graphics(device, cs);
3413 	}
3414 }
3415 
3416 static void
radv_init_compute_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3417 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3418 {
3419 	si_emit_compute(queue->device, cs);
3420 }
3421 
3422 static VkResult
radv_get_preamble_cs(struct radv_queue * queue,uint32_t scratch_size_per_wave,uint32_t scratch_waves,uint32_t compute_scratch_size_per_wave,uint32_t compute_scratch_waves,uint32_t esgs_ring_size,uint32_t gsvs_ring_size,bool needs_tess_rings,bool needs_gds,bool needs_gds_oa,bool needs_sample_positions,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)3423 radv_get_preamble_cs(struct radv_queue *queue,
3424 		     uint32_t scratch_size_per_wave,
3425 		     uint32_t scratch_waves,
3426 		     uint32_t compute_scratch_size_per_wave,
3427 		     uint32_t compute_scratch_waves,
3428 		     uint32_t esgs_ring_size,
3429 		     uint32_t gsvs_ring_size,
3430 		     bool needs_tess_rings,
3431 		     bool needs_gds,
3432 		     bool needs_gds_oa,
3433 		     bool needs_sample_positions,
3434 		     struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3435                      struct radeon_cmdbuf **initial_preamble_cs,
3436                      struct radeon_cmdbuf **continue_preamble_cs)
3437 {
3438 	struct radeon_winsys_bo *scratch_bo = NULL;
3439 	struct radeon_winsys_bo *descriptor_bo = NULL;
3440 	struct radeon_winsys_bo *compute_scratch_bo = NULL;
3441 	struct radeon_winsys_bo *esgs_ring_bo = NULL;
3442 	struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3443 	struct radeon_winsys_bo *tess_rings_bo = NULL;
3444 	struct radeon_winsys_bo *gds_bo = NULL;
3445 	struct radeon_winsys_bo *gds_oa_bo = NULL;
3446 	struct radeon_cmdbuf *dest_cs[3] = {0};
3447 	bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3448 	unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3449 	unsigned max_offchip_buffers;
3450 	unsigned hs_offchip_param = 0;
3451 	unsigned tess_offchip_ring_offset;
3452 	uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3453 	if (!queue->has_tess_rings) {
3454 		if (needs_tess_rings)
3455 			add_tess_rings = true;
3456 	}
3457 	if (!queue->has_gds) {
3458 		if (needs_gds)
3459 			add_gds = true;
3460 	}
3461 	if (!queue->has_gds_oa) {
3462 		if (needs_gds_oa)
3463 			add_gds_oa = true;
3464 	}
3465 	if (!queue->has_sample_positions) {
3466 		if (needs_sample_positions)
3467 			add_sample_positions = true;
3468 	}
3469 	tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3470 	hs_offchip_param = radv_get_hs_offchip_param(queue->device,
3471 						     &max_offchip_buffers);
3472 	tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3473 	tess_offchip_ring_size = max_offchip_buffers *
3474 		queue->device->tess_offchip_block_dw_size * 4;
3475 
3476 	scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3477 	if (scratch_size_per_wave)
3478 		scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3479 	else
3480 		scratch_waves = 0;
3481 
3482 	compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3483 	if (compute_scratch_size_per_wave)
3484 		compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3485 	else
3486 		compute_scratch_waves = 0;
3487 
3488 	if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3489 	    scratch_waves <= queue->scratch_waves &&
3490 	    compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3491 	    compute_scratch_waves <= queue->compute_scratch_waves &&
3492 	    esgs_ring_size <= queue->esgs_ring_size &&
3493 	    gsvs_ring_size <= queue->gsvs_ring_size &&
3494 	    !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3495 	    queue->initial_preamble_cs) {
3496 		*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3497 		*initial_preamble_cs = queue->initial_preamble_cs;
3498 		*continue_preamble_cs = queue->continue_preamble_cs;
3499 		if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
3500 		    !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
3501 		    !needs_gds && !needs_gds_oa && !needs_sample_positions)
3502 			*continue_preamble_cs = NULL;
3503 		return VK_SUCCESS;
3504 	}
3505 
3506 	uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3507 	uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3508 	if (scratch_size > queue_scratch_size) {
3509 		scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3510 		                                              scratch_size,
3511 		                                              4096,
3512 		                                              RADEON_DOMAIN_VRAM,
3513 		                                              ring_bo_flags,
3514 		                                              RADV_BO_PRIORITY_SCRATCH);
3515 		if (!scratch_bo)
3516 			goto fail;
3517 	} else
3518 		scratch_bo = queue->scratch_bo;
3519 
3520 	uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3521 	uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3522 	if (compute_scratch_size > compute_queue_scratch_size) {
3523 		compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3524 		                                                      compute_scratch_size,
3525 		                                                      4096,
3526 		                                                      RADEON_DOMAIN_VRAM,
3527 		                                                      ring_bo_flags,
3528 		                                                      RADV_BO_PRIORITY_SCRATCH);
3529 		if (!compute_scratch_bo)
3530 			goto fail;
3531 
3532 	} else
3533 		compute_scratch_bo = queue->compute_scratch_bo;
3534 
3535 	if (esgs_ring_size > queue->esgs_ring_size) {
3536 		esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3537 								esgs_ring_size,
3538 								4096,
3539 								RADEON_DOMAIN_VRAM,
3540 								ring_bo_flags,
3541 								RADV_BO_PRIORITY_SCRATCH);
3542 		if (!esgs_ring_bo)
3543 			goto fail;
3544 	} else {
3545 		esgs_ring_bo = queue->esgs_ring_bo;
3546 		esgs_ring_size = queue->esgs_ring_size;
3547 	}
3548 
3549 	if (gsvs_ring_size > queue->gsvs_ring_size) {
3550 		gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3551 								gsvs_ring_size,
3552 								4096,
3553 								RADEON_DOMAIN_VRAM,
3554 								ring_bo_flags,
3555 								RADV_BO_PRIORITY_SCRATCH);
3556 		if (!gsvs_ring_bo)
3557 			goto fail;
3558 	} else {
3559 		gsvs_ring_bo = queue->gsvs_ring_bo;
3560 		gsvs_ring_size = queue->gsvs_ring_size;
3561 	}
3562 
3563 	if (add_tess_rings) {
3564 		tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
3565 								 tess_offchip_ring_offset + tess_offchip_ring_size,
3566 								 256,
3567 								 RADEON_DOMAIN_VRAM,
3568 								 ring_bo_flags,
3569 								 RADV_BO_PRIORITY_SCRATCH);
3570 		if (!tess_rings_bo)
3571 			goto fail;
3572 	} else {
3573 		tess_rings_bo = queue->tess_rings_bo;
3574 	}
3575 
3576 	if (add_gds) {
3577 		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3578 
3579 		/* 4 streamout GDS counters.
3580 		 * We need 256B (64 dw) of GDS, otherwise streamout hangs.
3581 		 */
3582 		gds_bo = queue->device->ws->buffer_create(queue->device->ws,
3583 							  256, 4,
3584 							  RADEON_DOMAIN_GDS,
3585 							  ring_bo_flags,
3586 							  RADV_BO_PRIORITY_SCRATCH);
3587 		if (!gds_bo)
3588 			goto fail;
3589 	} else {
3590 		gds_bo = queue->gds_bo;
3591 	}
3592 
3593 	if (add_gds_oa) {
3594 		assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3595 
3596 		gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
3597 							     4, 1,
3598 							     RADEON_DOMAIN_OA,
3599 							     ring_bo_flags,
3600 							     RADV_BO_PRIORITY_SCRATCH);
3601 		if (!gds_oa_bo)
3602 			goto fail;
3603 	} else {
3604 		gds_oa_bo = queue->gds_oa_bo;
3605 	}
3606 
3607 	if (scratch_bo != queue->scratch_bo ||
3608 	    esgs_ring_bo != queue->esgs_ring_bo ||
3609 	    gsvs_ring_bo != queue->gsvs_ring_bo ||
3610 	    tess_rings_bo != queue->tess_rings_bo ||
3611 	    add_sample_positions) {
3612 		uint32_t size = 0;
3613 		if (gsvs_ring_bo || esgs_ring_bo ||
3614 		    tess_rings_bo || add_sample_positions) {
3615 			size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
3616 			if (add_sample_positions)
3617 				size += 128; /* 64+32+16+8 = 120 bytes */
3618 		}
3619 		else if (scratch_bo)
3620 			size = 8; /* 2 dword */
3621 
3622 		descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
3623 		                                                 size,
3624 		                                                 4096,
3625 		                                                 RADEON_DOMAIN_VRAM,
3626 		                                                 RADEON_FLAG_CPU_ACCESS |
3627 								 RADEON_FLAG_NO_INTERPROCESS_SHARING |
3628 								 RADEON_FLAG_READ_ONLY,
3629 								 RADV_BO_PRIORITY_DESCRIPTOR);
3630 		if (!descriptor_bo)
3631 			goto fail;
3632 	} else
3633 		descriptor_bo = queue->descriptor_bo;
3634 
3635 	if (descriptor_bo != queue->descriptor_bo) {
3636 		uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
3637 		if (!map)
3638 			goto fail;
3639 
3640 		if (scratch_bo) {
3641 			uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
3642 			uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3643 				         S_008F04_SWIZZLE_ENABLE(1);
3644 			map[0] = scratch_va;
3645 			map[1] = rsrc1;
3646 		}
3647 
3648 		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
3649 			fill_geom_tess_rings(queue, map, add_sample_positions,
3650 					     esgs_ring_size, esgs_ring_bo,
3651 					     gsvs_ring_size, gsvs_ring_bo,
3652 					     tess_factor_ring_size,
3653 					     tess_offchip_ring_offset,
3654 					     tess_offchip_ring_size,
3655 					     tess_rings_bo);
3656 
3657 		queue->device->ws->buffer_unmap(descriptor_bo);
3658 	}
3659 
3660 	for(int i = 0; i < 3; ++i) {
3661 		struct radeon_cmdbuf *cs = NULL;
3662 		cs = queue->device->ws->cs_create(queue->device->ws,
3663 						  queue->queue_family_index ? RING_COMPUTE : RING_GFX);
3664 		if (!cs)
3665 			goto fail;
3666 
3667 		dest_cs[i] = cs;
3668 
3669 		if (scratch_bo)
3670 			radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3671 
3672 		/* Emit initial configuration. */
3673 		switch (queue->queue_family_index) {
3674 		case RADV_QUEUE_GENERAL:
3675 			radv_init_graphics_state(cs, queue);
3676 			break;
3677 		case RADV_QUEUE_COMPUTE:
3678 			radv_init_compute_state(cs, queue);
3679 			break;
3680 		case RADV_QUEUE_TRANSFER:
3681 			break;
3682 		}
3683 
3684 		if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo)  {
3685 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3686 			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
3687 
3688 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3689 			radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
3690 		}
3691 
3692 		radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
3693 					gsvs_ring_bo, gsvs_ring_size);
3694 		radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
3695 					   tess_factor_ring_size, tess_rings_bo);
3696 		radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
3697 		radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
3698 		                          compute_scratch_waves, compute_scratch_bo);
3699 		radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
3700 		                           scratch_waves, scratch_bo);
3701 
3702 		if (gds_bo)
3703 			radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
3704 		if (gds_oa_bo)
3705 			radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
3706 
3707 		if (queue->device->trace_bo)
3708 			radv_cs_add_buffer(queue->device->ws, cs, queue->device->trace_bo);
3709 
3710 		if (queue->device->border_color_data.bo)
3711 			radv_cs_add_buffer(queue->device->ws, cs,
3712 					   queue->device->border_color_data.bo);
3713 
3714 		if (i == 0) {
3715 			si_cs_emit_cache_flush(cs,
3716 			                       queue->device->physical_device->rad_info.chip_class,
3717 					       NULL, 0,
3718 			                       queue->queue_family_index == RING_COMPUTE &&
3719 			                         queue->device->physical_device->rad_info.chip_class >= GFX7,
3720 			                       (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
3721 			                       RADV_CMD_FLAG_INV_ICACHE |
3722 			                       RADV_CMD_FLAG_INV_SCACHE |
3723 			                       RADV_CMD_FLAG_INV_VCACHE |
3724 			                       RADV_CMD_FLAG_INV_L2 |
3725 					       RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
3726 		} else if (i == 1) {
3727 			si_cs_emit_cache_flush(cs,
3728 			                       queue->device->physical_device->rad_info.chip_class,
3729 					       NULL, 0,
3730 			                       queue->queue_family_index == RING_COMPUTE &&
3731 			                         queue->device->physical_device->rad_info.chip_class >= GFX7,
3732 			                       RADV_CMD_FLAG_INV_ICACHE |
3733 			                       RADV_CMD_FLAG_INV_SCACHE |
3734 			                       RADV_CMD_FLAG_INV_VCACHE |
3735 			                       RADV_CMD_FLAG_INV_L2 |
3736 					       RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
3737 		}
3738 
3739 		if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
3740 			goto fail;
3741 	}
3742 
3743 	if (queue->initial_full_flush_preamble_cs)
3744 			queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
3745 
3746 	if (queue->initial_preamble_cs)
3747 			queue->device->ws->cs_destroy(queue->initial_preamble_cs);
3748 
3749 	if (queue->continue_preamble_cs)
3750 			queue->device->ws->cs_destroy(queue->continue_preamble_cs);
3751 
3752 	queue->initial_full_flush_preamble_cs = dest_cs[0];
3753 	queue->initial_preamble_cs = dest_cs[1];
3754 	queue->continue_preamble_cs = dest_cs[2];
3755 
3756 	if (scratch_bo != queue->scratch_bo) {
3757 		if (queue->scratch_bo)
3758 			queue->device->ws->buffer_destroy(queue->scratch_bo);
3759 		queue->scratch_bo = scratch_bo;
3760 	}
3761 	queue->scratch_size_per_wave = scratch_size_per_wave;
3762 	queue->scratch_waves = scratch_waves;
3763 
3764 	if (compute_scratch_bo != queue->compute_scratch_bo) {
3765 		if (queue->compute_scratch_bo)
3766 			queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
3767 		queue->compute_scratch_bo = compute_scratch_bo;
3768 	}
3769 	queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
3770 	queue->compute_scratch_waves = compute_scratch_waves;
3771 
3772 	if (esgs_ring_bo != queue->esgs_ring_bo) {
3773 		if (queue->esgs_ring_bo)
3774 			queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
3775 		queue->esgs_ring_bo = esgs_ring_bo;
3776 		queue->esgs_ring_size = esgs_ring_size;
3777 	}
3778 
3779 	if (gsvs_ring_bo != queue->gsvs_ring_bo) {
3780 		if (queue->gsvs_ring_bo)
3781 			queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
3782 		queue->gsvs_ring_bo = gsvs_ring_bo;
3783 		queue->gsvs_ring_size = gsvs_ring_size;
3784 	}
3785 
3786 	if (tess_rings_bo != queue->tess_rings_bo) {
3787 		queue->tess_rings_bo = tess_rings_bo;
3788 		queue->has_tess_rings = true;
3789 	}
3790 
3791 	if (gds_bo != queue->gds_bo) {
3792 		queue->gds_bo = gds_bo;
3793 		queue->has_gds = true;
3794 	}
3795 
3796 	if (gds_oa_bo != queue->gds_oa_bo) {
3797 		queue->gds_oa_bo = gds_oa_bo;
3798 		queue->has_gds_oa = true;
3799 	}
3800 
3801 	if (descriptor_bo != queue->descriptor_bo) {
3802 		if (queue->descriptor_bo)
3803 			queue->device->ws->buffer_destroy(queue->descriptor_bo);
3804 
3805 		queue->descriptor_bo = descriptor_bo;
3806 	}
3807 
3808 	if (add_sample_positions)
3809 		queue->has_sample_positions = true;
3810 
3811 	*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3812 	*initial_preamble_cs = queue->initial_preamble_cs;
3813 	*continue_preamble_cs = queue->continue_preamble_cs;
3814 	if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
3815 			*continue_preamble_cs = NULL;
3816 	return VK_SUCCESS;
3817 fail:
3818 	for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
3819 		if (dest_cs[i])
3820 			queue->device->ws->cs_destroy(dest_cs[i]);
3821 	if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
3822 		queue->device->ws->buffer_destroy(descriptor_bo);
3823 	if (scratch_bo && scratch_bo != queue->scratch_bo)
3824 		queue->device->ws->buffer_destroy(scratch_bo);
3825 	if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
3826 		queue->device->ws->buffer_destroy(compute_scratch_bo);
3827 	if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
3828 		queue->device->ws->buffer_destroy(esgs_ring_bo);
3829 	if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
3830 		queue->device->ws->buffer_destroy(gsvs_ring_bo);
3831 	if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
3832 		queue->device->ws->buffer_destroy(tess_rings_bo);
3833 	if (gds_bo && gds_bo != queue->gds_bo)
3834 		queue->device->ws->buffer_destroy(gds_bo);
3835 	if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
3836 		queue->device->ws->buffer_destroy(gds_oa_bo);
3837 
3838 	return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3839 }
3840 
radv_alloc_sem_counts(struct radv_device * device,struct radv_winsys_sem_counts * counts,int num_sems,struct radv_semaphore_part ** sems,const uint64_t * timeline_values,VkFence _fence,bool is_signal)3841 static VkResult radv_alloc_sem_counts(struct radv_device *device,
3842 				      struct radv_winsys_sem_counts *counts,
3843 				      int num_sems,
3844 				      struct radv_semaphore_part **sems,
3845 				      const uint64_t *timeline_values,
3846 				      VkFence _fence,
3847 				      bool is_signal)
3848 {
3849 	int syncobj_idx = 0, non_reset_idx = 0, sem_idx = 0, timeline_idx = 0;
3850 
3851 	if (num_sems == 0 && _fence == VK_NULL_HANDLE)
3852 		return VK_SUCCESS;
3853 
3854 	for (uint32_t i = 0; i < num_sems; i++) {
3855 		switch(sems[i]->kind) {
3856 		case RADV_SEMAPHORE_SYNCOBJ:
3857 			counts->syncobj_count++;
3858 			counts->syncobj_reset_count++;
3859 			break;
3860 		case RADV_SEMAPHORE_WINSYS:
3861 			counts->sem_count++;
3862 			break;
3863 		case RADV_SEMAPHORE_NONE:
3864 			break;
3865 		case RADV_SEMAPHORE_TIMELINE:
3866 			counts->syncobj_count++;
3867 			break;
3868 		case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
3869 			counts->timeline_syncobj_count++;
3870 			break;
3871 		}
3872 	}
3873 
3874 	if (_fence != VK_NULL_HANDLE) {
3875 		RADV_FROM_HANDLE(radv_fence, fence, _fence);
3876 
3877 		struct radv_fence_part *part =
3878 			fence->temporary.kind != RADV_FENCE_NONE ?
3879 			&fence->temporary : &fence->permanent;
3880 		if (part->kind == RADV_FENCE_SYNCOBJ)
3881 			counts->syncobj_count++;
3882 	}
3883 
3884 	if (counts->syncobj_count || counts->timeline_syncobj_count) {
3885 		counts->points = (uint64_t *)malloc(
3886 			sizeof(*counts->syncobj) * counts->syncobj_count +
3887 			(sizeof(*counts->syncobj) + sizeof(*counts->points)) * counts->timeline_syncobj_count);
3888 		if (!counts->points)
3889 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3890 		counts->syncobj = (uint32_t*)(counts->points + counts->timeline_syncobj_count);
3891 	}
3892 
3893 	if (counts->sem_count) {
3894 		counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
3895 		if (!counts->sem) {
3896 			free(counts->syncobj);
3897 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3898 		}
3899 	}
3900 
3901 	non_reset_idx = counts->syncobj_reset_count;
3902 
3903 	for (uint32_t i = 0; i < num_sems; i++) {
3904 		switch(sems[i]->kind) {
3905 		case RADV_SEMAPHORE_NONE:
3906 			unreachable("Empty semaphore");
3907 			break;
3908 		case RADV_SEMAPHORE_SYNCOBJ:
3909 			counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
3910 			break;
3911 		case RADV_SEMAPHORE_WINSYS:
3912 			counts->sem[sem_idx++] = sems[i]->ws_sem;
3913 			break;
3914 		case RADV_SEMAPHORE_TIMELINE: {
3915 			pthread_mutex_lock(&sems[i]->timeline.mutex);
3916 			struct radv_timeline_point *point = NULL;
3917 			if (is_signal) {
3918 				point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
3919 			} else {
3920 				point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
3921 			}
3922 
3923 			pthread_mutex_unlock(&sems[i]->timeline.mutex);
3924 
3925 			if (point) {
3926 				counts->syncobj[non_reset_idx++] = point->syncobj;
3927 			} else {
3928 				/* Explicitly remove the semaphore so we might not find
3929 				 * a point later post-submit. */
3930 				sems[i] = NULL;
3931 			}
3932 			break;
3933 		}
3934 		case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
3935 			counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
3936 			counts->points[timeline_idx] = timeline_values[i];
3937 			++timeline_idx;
3938 			break;
3939 		}
3940 	}
3941 
3942 	if (_fence != VK_NULL_HANDLE) {
3943 		RADV_FROM_HANDLE(radv_fence, fence, _fence);
3944 
3945 		struct radv_fence_part *part =
3946 			fence->temporary.kind != RADV_FENCE_NONE ?
3947 			&fence->temporary : &fence->permanent;
3948 		if (part->kind == RADV_FENCE_SYNCOBJ)
3949 			counts->syncobj[non_reset_idx++] = part->syncobj;
3950 	}
3951 
3952 	assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
3953 	counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
3954 
3955 	return VK_SUCCESS;
3956 }
3957 
3958 static void
radv_free_sem_info(struct radv_winsys_sem_info * sem_info)3959 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
3960 {
3961 	free(sem_info->wait.points);
3962 	free(sem_info->wait.sem);
3963 	free(sem_info->signal.points);
3964 	free(sem_info->signal.sem);
3965 }
3966 
3967 
radv_free_temp_syncobjs(struct radv_device * device,int num_sems,struct radv_semaphore_part * sems)3968 static void radv_free_temp_syncobjs(struct radv_device *device,
3969 				    int num_sems,
3970 				    struct radv_semaphore_part *sems)
3971 {
3972 	for (uint32_t i = 0; i < num_sems; i++) {
3973 		radv_destroy_semaphore_part(device, sems + i);
3974 	}
3975 }
3976 
3977 static VkResult
radv_alloc_sem_info(struct radv_device * device,struct radv_winsys_sem_info * sem_info,int num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,int num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,VkFence fence)3978 radv_alloc_sem_info(struct radv_device *device,
3979 		    struct radv_winsys_sem_info *sem_info,
3980 		    int num_wait_sems,
3981 		    struct radv_semaphore_part **wait_sems,
3982 		    const uint64_t *wait_values,
3983 		    int num_signal_sems,
3984 		    struct radv_semaphore_part **signal_sems,
3985 		    const uint64_t *signal_values,
3986 		    VkFence fence)
3987 {
3988 	VkResult ret;
3989 	memset(sem_info, 0, sizeof(*sem_info));
3990 
3991 	ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
3992 	if (ret)
3993 		return ret;
3994 	ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
3995 	if (ret)
3996 		radv_free_sem_info(sem_info);
3997 
3998 	/* caller can override these */
3999 	sem_info->cs_emit_wait = true;
4000 	sem_info->cs_emit_signal = true;
4001 	return ret;
4002 }
4003 
4004 static void
radv_finalize_timelines(struct radv_device * device,uint32_t num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,uint32_t num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,struct list_head * processing_list)4005 radv_finalize_timelines(struct radv_device *device,
4006                         uint32_t num_wait_sems,
4007                         struct radv_semaphore_part **wait_sems,
4008                         const uint64_t *wait_values,
4009                         uint32_t num_signal_sems,
4010                         struct radv_semaphore_part **signal_sems,
4011                         const uint64_t *signal_values,
4012                         struct list_head *processing_list)
4013 {
4014 	for (uint32_t i = 0; i < num_wait_sems; ++i) {
4015 		if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4016 			pthread_mutex_lock(&wait_sems[i]->timeline.mutex);
4017 			struct radv_timeline_point *point =
4018 				radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
4019 			point->wait_count -= 2;
4020 			pthread_mutex_unlock(&wait_sems[i]->timeline.mutex);
4021 		}
4022 	}
4023 	for (uint32_t i = 0; i < num_signal_sems; ++i) {
4024 		if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4025 			pthread_mutex_lock(&signal_sems[i]->timeline.mutex);
4026 			struct radv_timeline_point *point =
4027 				radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
4028 			signal_sems[i]->timeline.highest_submitted =
4029 				MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4030 			point->wait_count -= 2;
4031 			radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4032 			pthread_mutex_unlock(&signal_sems[i]->timeline.mutex);
4033 		} else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
4034 			signal_sems[i]->timeline_syncobj.max_point =
4035 				MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
4036 		}
4037 	}
4038 }
4039 
4040 static VkResult
radv_sparse_buffer_bind_memory(struct radv_device * device,const VkSparseBufferMemoryBindInfo * bind)4041 radv_sparse_buffer_bind_memory(struct radv_device *device,
4042                                const VkSparseBufferMemoryBindInfo *bind)
4043 {
4044 	RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4045 	VkResult result;
4046 
4047 	for (uint32_t i = 0; i < bind->bindCount; ++i) {
4048 		struct radv_device_memory *mem = NULL;
4049 
4050 		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4051 			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4052 
4053 		result = device->ws->buffer_virtual_bind(buffer->bo,
4054 							 bind->pBinds[i].resourceOffset,
4055 							 bind->pBinds[i].size,
4056 							 mem ? mem->bo : NULL,
4057 							 bind->pBinds[i].memoryOffset);
4058 		if (result != VK_SUCCESS)
4059 			return result;
4060 	}
4061 
4062 	return VK_SUCCESS;
4063 }
4064 
4065 static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device * device,const VkSparseImageOpaqueMemoryBindInfo * bind)4066 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4067                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
4068 {
4069 	RADV_FROM_HANDLE(radv_image, image, bind->image);
4070 	VkResult result;
4071 
4072 	for (uint32_t i = 0; i < bind->bindCount; ++i) {
4073 		struct radv_device_memory *mem = NULL;
4074 
4075 		if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4076 			mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4077 
4078 		result = device->ws->buffer_virtual_bind(image->bo,
4079 							 bind->pBinds[i].resourceOffset,
4080 							 bind->pBinds[i].size,
4081 							 mem ? mem->bo : NULL,
4082 							 bind->pBinds[i].memoryOffset);
4083 		if (result != VK_SUCCESS)
4084 			return result;
4085 	}
4086 
4087 	return VK_SUCCESS;
4088 }
4089 
4090 static VkResult
radv_get_preambles(struct radv_queue * queue,const VkCommandBuffer * cmd_buffers,uint32_t cmd_buffer_count,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)4091 radv_get_preambles(struct radv_queue *queue,
4092                    const VkCommandBuffer *cmd_buffers,
4093                    uint32_t cmd_buffer_count,
4094                    struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4095                    struct radeon_cmdbuf **initial_preamble_cs,
4096                    struct radeon_cmdbuf **continue_preamble_cs)
4097 {
4098 	uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4099 	uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4100 	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4101 	bool tess_rings_needed = false;
4102 	bool gds_needed = false;
4103 	bool gds_oa_needed = false;
4104 	bool sample_positions_needed = false;
4105 
4106 	for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4107 		RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
4108 				 cmd_buffers[j]);
4109 
4110 		scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4111 		waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4112 		compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
4113 		                                     cmd_buffer->compute_scratch_size_per_wave_needed);
4114 		compute_waves_wanted = MAX2(compute_waves_wanted,
4115 		                            cmd_buffer->compute_scratch_waves_wanted);
4116 		esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4117 		gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4118 		tess_rings_needed |= cmd_buffer->tess_rings_needed;
4119 		gds_needed |= cmd_buffer->gds_needed;
4120 		gds_oa_needed |= cmd_buffer->gds_oa_needed;
4121 		sample_positions_needed |= cmd_buffer->sample_positions_needed;
4122 	}
4123 
4124 	return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4125 	                            compute_scratch_size_per_wave, compute_waves_wanted,
4126 	                            esgs_ring_size, gsvs_ring_size, tess_rings_needed,
4127 	                            gds_needed, gds_oa_needed, sample_positions_needed,
4128 	                            initial_full_flush_preamble_cs,
4129 	                            initial_preamble_cs, continue_preamble_cs);
4130 }
4131 
4132 struct radv_deferred_queue_submission {
4133 	struct radv_queue *queue;
4134 	VkCommandBuffer *cmd_buffers;
4135 	uint32_t cmd_buffer_count;
4136 
4137 	/* Sparse bindings that happen on a queue. */
4138 	VkSparseBufferMemoryBindInfo *buffer_binds;
4139 	uint32_t buffer_bind_count;
4140 	VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4141 	uint32_t image_opaque_bind_count;
4142 
4143 	bool flush_caches;
4144 	VkShaderStageFlags wait_dst_stage_mask;
4145 	struct radv_semaphore_part **wait_semaphores;
4146 	uint32_t wait_semaphore_count;
4147 	struct radv_semaphore_part **signal_semaphores;
4148 	uint32_t signal_semaphore_count;
4149 	VkFence fence;
4150 
4151 	uint64_t *wait_values;
4152 	uint64_t *signal_values;
4153 
4154 	struct radv_semaphore_part *temporary_semaphore_parts;
4155 	uint32_t temporary_semaphore_part_count;
4156 
4157 	struct list_head queue_pending_list;
4158 	uint32_t submission_wait_count;
4159 	struct radv_timeline_waiter *wait_nodes;
4160 
4161 	struct list_head processing_list;
4162 };
4163 
4164 struct radv_queue_submission {
4165 	const VkCommandBuffer *cmd_buffers;
4166 	uint32_t cmd_buffer_count;
4167 
4168 	/* Sparse bindings that happen on a queue. */
4169 	const VkSparseBufferMemoryBindInfo *buffer_binds;
4170 	uint32_t buffer_bind_count;
4171 	const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4172 	uint32_t image_opaque_bind_count;
4173 
4174 	bool flush_caches;
4175 	VkPipelineStageFlags wait_dst_stage_mask;
4176 	const VkSemaphore *wait_semaphores;
4177 	uint32_t wait_semaphore_count;
4178 	const VkSemaphore *signal_semaphores;
4179 	uint32_t signal_semaphore_count;
4180 	VkFence fence;
4181 
4182 	const uint64_t *wait_values;
4183 	uint32_t wait_value_count;
4184 	const uint64_t *signal_values;
4185 	uint32_t signal_value_count;
4186 };
4187 
4188 static VkResult
4189 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4190                               uint32_t decrement,
4191                               struct list_head *processing_list);
4192 
4193 static VkResult
radv_create_deferred_submission(struct radv_queue * queue,const struct radv_queue_submission * submission,struct radv_deferred_queue_submission ** out)4194 radv_create_deferred_submission(struct radv_queue *queue,
4195                                 const struct radv_queue_submission *submission,
4196                                 struct radv_deferred_queue_submission **out)
4197 {
4198 	struct radv_deferred_queue_submission *deferred = NULL;
4199 	size_t size = sizeof(struct radv_deferred_queue_submission);
4200 
4201 	uint32_t temporary_count = 0;
4202 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4203 		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4204 		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4205 			++temporary_count;
4206 	}
4207 
4208 	size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4209 	size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4210 	size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4211 	size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4212 	size += temporary_count * sizeof(struct radv_semaphore_part);
4213 	size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4214 	size += submission->wait_value_count * sizeof(uint64_t);
4215 	size += submission->signal_value_count * sizeof(uint64_t);
4216 	size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4217 
4218 	deferred = calloc(1, size);
4219 	if (!deferred)
4220 		return VK_ERROR_OUT_OF_HOST_MEMORY;
4221 
4222 	deferred->queue = queue;
4223 
4224 	deferred->cmd_buffers = (void*)(deferred + 1);
4225 	deferred->cmd_buffer_count = submission->cmd_buffer_count;
4226 	memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4227 	       submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4228 
4229 	deferred->buffer_binds = (void*)(deferred->cmd_buffers + submission->cmd_buffer_count);
4230 	deferred->buffer_bind_count = submission->buffer_bind_count;
4231 	memcpy(deferred->buffer_binds, submission->buffer_binds,
4232 	       submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4233 
4234 	deferred->image_opaque_binds = (void*)(deferred->buffer_binds + submission->buffer_bind_count);
4235 	deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4236 	memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4237 	       submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4238 
4239 	deferred->flush_caches = submission->flush_caches;
4240 	deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4241 
4242 	deferred->wait_semaphores = (void*)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4243 	deferred->wait_semaphore_count = submission->wait_semaphore_count;
4244 
4245 	deferred->signal_semaphores = (void*)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4246 	deferred->signal_semaphore_count = submission->signal_semaphore_count;
4247 
4248 	deferred->fence = submission->fence;
4249 
4250 	deferred->temporary_semaphore_parts = (void*)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4251 	deferred->temporary_semaphore_part_count = temporary_count;
4252 
4253 	uint32_t temporary_idx = 0;
4254 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4255 		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4256 		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4257 			deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4258 			deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4259 			semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4260 			++temporary_idx;
4261 		} else
4262 			deferred->wait_semaphores[i] = &semaphore->permanent;
4263 	}
4264 
4265 	for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4266 		RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4267 		if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4268 			deferred->signal_semaphores[i] = &semaphore->temporary;
4269 		} else {
4270 			deferred->signal_semaphores[i] = &semaphore->permanent;
4271 		}
4272 	}
4273 
4274 	deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
4275 	memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
4276 	deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4277 	memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
4278 
4279 	deferred->wait_nodes = (void*)(deferred->signal_values + submission->signal_value_count);
4280 	/* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4281 	 * ensure the submission is not accidentally triggered early when adding wait timelines. */
4282 	deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4283 
4284 	*out = deferred;
4285 	return VK_SUCCESS;
4286 }
4287 
4288 static VkResult
radv_queue_enqueue_submission(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4289 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4290                               struct list_head *processing_list)
4291 {
4292 	uint32_t wait_cnt = 0;
4293 	struct radv_timeline_waiter *waiter = submission->wait_nodes;
4294 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4295 		if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4296 			pthread_mutex_lock(&submission->wait_semaphores[i]->timeline.mutex);
4297 			if (submission->wait_semaphores[i]->timeline.highest_submitted < submission->wait_values[i]) {
4298 				++wait_cnt;
4299 				waiter->value = submission->wait_values[i];
4300 				waiter->submission = submission;
4301 				list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4302 				++waiter;
4303 			}
4304 			pthread_mutex_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4305 		}
4306 	}
4307 
4308 	pthread_mutex_lock(&submission->queue->pending_mutex);
4309 
4310 	bool is_first = list_is_empty(&submission->queue->pending_submissions);
4311 	list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4312 
4313 	pthread_mutex_unlock(&submission->queue->pending_mutex);
4314 
4315 	/* If there is already a submission in the queue, that will decrement the counter by 1 when
4316 	 * submitted, but if the queue was empty, we decrement ourselves as there is no previous
4317 	 * submission. */
4318 	uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4319 
4320 	/* if decrement is zero, then we don't have a refcounted reference to the
4321 	 * submission anymore, so it is not safe to access the submission. */
4322 	if (!decrement)
4323 		return VK_SUCCESS;
4324 
4325 	return radv_queue_trigger_submission(submission, decrement, processing_list);
4326 }
4327 
4328 static void
radv_queue_submission_update_queue(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4329 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4330                                    struct list_head *processing_list)
4331 {
4332 	pthread_mutex_lock(&submission->queue->pending_mutex);
4333 	list_del(&submission->queue_pending_list);
4334 
4335 	/* trigger the next submission in the queue. */
4336 	if (!list_is_empty(&submission->queue->pending_submissions)) {
4337 		struct radv_deferred_queue_submission *next_submission =
4338 			list_first_entry(&submission->queue->pending_submissions,
4339 			                 struct radv_deferred_queue_submission,
4340 			                 queue_pending_list);
4341 		radv_queue_trigger_submission(next_submission, 1, processing_list);
4342 	}
4343 	pthread_mutex_unlock(&submission->queue->pending_mutex);
4344 
4345 	pthread_cond_broadcast(&submission->queue->device->timeline_cond);
4346 }
4347 
4348 static VkResult
radv_queue_submit_deferred(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4349 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4350                            struct list_head *processing_list)
4351 {
4352 	RADV_FROM_HANDLE(radv_fence, fence, submission->fence);
4353 	struct radv_queue *queue = submission->queue;
4354 	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4355 	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4356 	struct radeon_winsys_fence *base_fence = NULL;
4357 	bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4358 	bool can_patch = true;
4359 	uint32_t advance;
4360 	struct radv_winsys_sem_info sem_info;
4361 	VkResult result;
4362 	struct radeon_cmdbuf *initial_preamble_cs = NULL;
4363 	struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4364 	struct radeon_cmdbuf *continue_preamble_cs = NULL;
4365 
4366 	if (fence) {
4367 		/* Under most circumstances, out fences won't be temporary.
4368 		 * However, the spec does allow it for opaque_fd.
4369 		 *
4370 		 * From the Vulkan 1.0.53 spec:
4371 		 *
4372 		 *    "If the import is temporary, the implementation must
4373 		 *    restore the semaphore to its prior permanent state after
4374 		 *    submitting the next semaphore wait operation."
4375 		 */
4376 		struct radv_fence_part *part =
4377 			fence->temporary.kind != RADV_FENCE_NONE ?
4378 			&fence->temporary : &fence->permanent;
4379 		if (part->kind == RADV_FENCE_WINSYS)
4380 			base_fence = part->fence;
4381 	}
4382 
4383 	result = radv_get_preambles(queue, submission->cmd_buffers,
4384 	                            submission->cmd_buffer_count,
4385 	                            &initial_preamble_cs,
4386 	                            &initial_flush_preamble_cs,
4387 	                            &continue_preamble_cs);
4388 	if (result != VK_SUCCESS)
4389 		goto fail;
4390 
4391 	result = radv_alloc_sem_info(queue->device,
4392 				     &sem_info,
4393 				     submission->wait_semaphore_count,
4394 				     submission->wait_semaphores,
4395 				     submission->wait_values,
4396 				     submission->signal_semaphore_count,
4397 				     submission->signal_semaphores,
4398 				     submission->signal_values,
4399 				     submission->fence);
4400 	if (result != VK_SUCCESS)
4401 		goto fail;
4402 
4403 	for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4404 		result = radv_sparse_buffer_bind_memory(queue->device,
4405 							submission->buffer_binds + i);
4406 		if (result != VK_SUCCESS)
4407 			goto fail;
4408 	}
4409 
4410 	for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4411 		result = radv_sparse_image_opaque_bind_memory(queue->device,
4412 							      submission->image_opaque_binds + i);
4413 		if (result != VK_SUCCESS)
4414 			goto fail;
4415 	}
4416 
4417 	if (!submission->cmd_buffer_count) {
4418 		result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
4419 						      &queue->device->empty_cs[queue->queue_family_index],
4420 						      1, NULL, NULL,
4421 						      &sem_info, NULL,
4422 						      false, base_fence);
4423 		if (result != VK_SUCCESS)
4424 			goto fail;
4425 	} else {
4426 		struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
4427 		                                         (submission->cmd_buffer_count));
4428 
4429 		for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4430 			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4431 			assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4432 
4433 			cs_array[j] = cmd_buffer->cs;
4434 			if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4435 				can_patch = false;
4436 
4437 			cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4438 		}
4439 
4440 		for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4441 			struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4442 			const struct radv_winsys_bo_list *bo_list = NULL;
4443 
4444 			advance = MIN2(max_cs_submission,
4445 			               submission->cmd_buffer_count - j);
4446 
4447 			if (queue->device->trace_bo)
4448 				*queue->device->trace_id_ptr = 0;
4449 
4450 			sem_info.cs_emit_wait = j == 0;
4451 			sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4452 
4453 			if (unlikely(queue->device->use_global_bo_list)) {
4454 				pthread_rwlock_rdlock(&queue->device->bo_list.rwlock);
4455 				bo_list = &queue->device->bo_list.list;
4456 			}
4457 
4458 			result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
4459 							      advance, initial_preamble, continue_preamble_cs,
4460 							      &sem_info, bo_list,
4461 							      can_patch, base_fence);
4462 
4463 			if (unlikely(queue->device->use_global_bo_list))
4464 				pthread_rwlock_unlock(&queue->device->bo_list.rwlock);
4465 
4466 			if (result != VK_SUCCESS)
4467 				goto fail;
4468 
4469 			if (queue->device->trace_bo) {
4470 				radv_check_gpu_hangs(queue, cs_array[j]);
4471 			}
4472 		}
4473 
4474 		free(cs_array);
4475 	}
4476 
4477 	radv_free_temp_syncobjs(queue->device,
4478 				submission->temporary_semaphore_part_count,
4479 				submission->temporary_semaphore_parts);
4480 	radv_finalize_timelines(queue->device,
4481 	                        submission->wait_semaphore_count,
4482 	                        submission->wait_semaphores,
4483 	                        submission->wait_values,
4484 	                        submission->signal_semaphore_count,
4485 	                        submission->signal_semaphores,
4486 	                        submission->signal_values,
4487 	                        processing_list);
4488 	/* Has to happen after timeline finalization to make sure the
4489 	 * condition variable is only triggered when timelines and queue have
4490 	 * been updated. */
4491 	radv_queue_submission_update_queue(submission, processing_list);
4492 	radv_free_sem_info(&sem_info);
4493 	free(submission);
4494 	return VK_SUCCESS;
4495 
4496 fail:
4497 	if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4498 		/* When something bad happened during the submission, such as
4499 		 * an out of memory issue, it might be hard to recover from
4500 		 * this inconsistent state. To avoid this sort of problem, we
4501 		 * assume that we are in a really bad situation and return
4502 		 * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4503 		 * to submit the same job again to this device.
4504 		 */
4505 		result = VK_ERROR_DEVICE_LOST;
4506 	}
4507 
4508 	radv_free_temp_syncobjs(queue->device,
4509 				submission->temporary_semaphore_part_count,
4510 				submission->temporary_semaphore_parts);
4511 	free(submission);
4512 	return result;
4513 }
4514 
4515 static VkResult
radv_process_submissions(struct list_head * processing_list)4516 radv_process_submissions(struct list_head *processing_list)
4517 {
4518 	while(!list_is_empty(processing_list)) {
4519 		struct radv_deferred_queue_submission *submission =
4520 			list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4521 		list_del(&submission->processing_list);
4522 
4523 		VkResult result = radv_queue_submit_deferred(submission, processing_list);
4524 		if (result != VK_SUCCESS)
4525 			return result;
4526 	}
4527 	return VK_SUCCESS;
4528 }
4529 
4530 static VkResult
wait_for_submission_timelines_available(struct radv_deferred_queue_submission * submission,uint64_t timeout)4531 wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
4532                                         uint64_t timeout)
4533 {
4534 	struct radv_device *device = submission->queue->device;
4535 	uint32_t syncobj_count = 0;
4536 	uint32_t syncobj_idx = 0;
4537 
4538 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4539 		if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4540 			continue;
4541 
4542 		if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4543 			continue;
4544 		++syncobj_count;
4545 	}
4546 
4547 	if (!syncobj_count)
4548 		return VK_SUCCESS;
4549 
4550 	uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
4551 	if (!points)
4552 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4553 
4554 	uint32_t *syncobj = (uint32_t*)(points + syncobj_count);
4555 
4556 	for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4557 		if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4558 			continue;
4559 
4560 		if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4561 			continue;
4562 
4563 		syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
4564 		points[syncobj_idx] = submission->wait_values[i];
4565 		++syncobj_idx;
4566 	}
4567 	bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, true, timeout);
4568 
4569 	free(points);
4570 	return success ? VK_SUCCESS : VK_TIMEOUT;
4571 }
4572 
radv_queue_submission_thread_run(void * q)4573 static void* radv_queue_submission_thread_run(void *q)
4574 {
4575 	struct radv_queue *queue = q;
4576 
4577 	pthread_mutex_lock(&queue->thread_mutex);
4578 	while (!p_atomic_read(&queue->thread_exit)) {
4579 		struct radv_deferred_queue_submission *submission = queue->thread_submission;
4580 		struct list_head processing_list;
4581 		VkResult result = VK_SUCCESS;
4582 		if (!submission) {
4583 			pthread_cond_wait(&queue->thread_cond, &queue->thread_mutex);
4584 			continue;
4585 		}
4586 		pthread_mutex_unlock(&queue->thread_mutex);
4587 
4588 		/* Wait at most 5 seconds so we have a chance to notice shutdown when
4589 		 * a semaphore never gets signaled. If it takes longer we just retry
4590 		 * the wait next iteration. */
4591 		result = wait_for_submission_timelines_available(submission,
4592 		                                                 radv_get_absolute_timeout(5000000000));
4593 		if (result != VK_SUCCESS) {
4594 			pthread_mutex_lock(&queue->thread_mutex);
4595 			continue;
4596 		}
4597 
4598 		/* The lock isn't held but nobody will add one until we finish
4599 		 * the current submission. */
4600 		p_atomic_set(&queue->thread_submission, NULL);
4601 
4602 		list_inithead(&processing_list);
4603 		list_addtail(&submission->processing_list, &processing_list);
4604 		result = radv_process_submissions(&processing_list);
4605 
4606 		pthread_mutex_lock(&queue->thread_mutex);
4607 	}
4608 	pthread_mutex_unlock(&queue->thread_mutex);
4609 	return NULL;
4610 }
4611 
4612 static VkResult
radv_queue_trigger_submission(struct radv_deferred_queue_submission * submission,uint32_t decrement,struct list_head * processing_list)4613 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4614                               uint32_t decrement,
4615                               struct list_head *processing_list)
4616 {
4617 	struct radv_queue *queue = submission->queue;
4618 	int ret;
4619 	if  (p_atomic_add_return(&submission->submission_wait_count, -decrement))
4620 		return VK_SUCCESS;
4621 
4622 	if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) == VK_SUCCESS) {
4623 		list_addtail(&submission->processing_list, processing_list);
4624 		return VK_SUCCESS;
4625 	}
4626 
4627 	pthread_mutex_lock(&queue->thread_mutex);
4628 
4629 	/* A submission can only be ready for the thread if it doesn't have
4630 	 * any predecessors in the same queue, so there can only be one such
4631 	 * submission at a time. */
4632 	assert(queue->thread_submission == NULL);
4633 
4634 	/* Only start the thread on demand to save resources for the many games
4635 	 * which only use binary semaphores. */
4636 	if (!queue->thread_running) {
4637 		ret  = pthread_create(&queue->submission_thread, NULL,
4638 		                      radv_queue_submission_thread_run, queue);
4639 		if (ret) {
4640 			pthread_mutex_unlock(&queue->thread_mutex);
4641 			return vk_errorf(queue->device->instance,
4642 			                 VK_ERROR_DEVICE_LOST,
4643 			                 "Failed to start submission thread");
4644 		}
4645 		queue->thread_running = true;
4646 	}
4647 
4648 	queue->thread_submission = submission;
4649 	pthread_mutex_unlock(&queue->thread_mutex);
4650 
4651 	pthread_cond_signal(&queue->thread_cond);
4652 	return VK_SUCCESS;
4653 }
4654 
radv_queue_submit(struct radv_queue * queue,const struct radv_queue_submission * submission)4655 static VkResult radv_queue_submit(struct radv_queue *queue,
4656                                   const struct radv_queue_submission *submission)
4657 {
4658 	struct radv_deferred_queue_submission *deferred = NULL;
4659 
4660 	VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4661 	if (result != VK_SUCCESS)
4662 		return result;
4663 
4664 	struct list_head processing_list;
4665 	list_inithead(&processing_list);
4666 
4667 	result = radv_queue_enqueue_submission(deferred, &processing_list);
4668 	if (result != VK_SUCCESS) {
4669 		/* If anything is in the list we leak. */
4670 		assert(list_is_empty(&processing_list));
4671 		return result;
4672 	}
4673 	return radv_process_submissions(&processing_list);
4674 }
4675 
4676 bool
radv_queue_internal_submit(struct radv_queue * queue,struct radeon_cmdbuf * cs)4677 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4678 {
4679 	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4680 	struct radv_winsys_sem_info sem_info;
4681 	VkResult result;
4682 
4683 	result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
4684 				     0, NULL, VK_NULL_HANDLE);
4685 	if (result != VK_SUCCESS)
4686 		return false;
4687 
4688 	result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
4689 					      NULL, NULL, &sem_info, NULL,
4690 					      false, NULL);
4691 	radv_free_sem_info(&sem_info);
4692 	if (result != VK_SUCCESS)
4693 		return false;
4694 
4695 	return true;
4696 
4697 }
4698 
4699 /* Signals fence as soon as all the work currently put on queue is done. */
radv_signal_fence(struct radv_queue * queue,VkFence fence)4700 static VkResult radv_signal_fence(struct radv_queue *queue,
4701                               VkFence fence)
4702 {
4703 	return radv_queue_submit(queue, &(struct radv_queue_submission) {
4704 			.fence = fence
4705 		});
4706 }
4707 
radv_submit_has_effects(const VkSubmitInfo * info)4708 static bool radv_submit_has_effects(const VkSubmitInfo *info)
4709 {
4710 	return info->commandBufferCount ||
4711 	       info->waitSemaphoreCount ||
4712 	       info->signalSemaphoreCount;
4713 }
4714 
radv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)4715 VkResult radv_QueueSubmit(
4716 	VkQueue                                     _queue,
4717 	uint32_t                                    submitCount,
4718 	const VkSubmitInfo*                         pSubmits,
4719 	VkFence                                     fence)
4720 {
4721 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
4722 	VkResult result;
4723 	uint32_t fence_idx = 0;
4724 	bool flushed_caches = false;
4725 
4726 	if (fence != VK_NULL_HANDLE) {
4727 		for (uint32_t i = 0; i < submitCount; ++i)
4728 			if (radv_submit_has_effects(pSubmits + i))
4729 				fence_idx = i;
4730 	} else
4731 		fence_idx = UINT32_MAX;
4732 
4733 	for (uint32_t i = 0; i < submitCount; i++) {
4734 		if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
4735 			continue;
4736 
4737 		VkPipelineStageFlags wait_dst_stage_mask = 0;
4738 		for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
4739 			wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
4740 		}
4741 
4742 		const VkTimelineSemaphoreSubmitInfo *timeline_info =
4743 			vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
4744 
4745 		result = radv_queue_submit(queue, &(struct radv_queue_submission) {
4746 				.cmd_buffers = pSubmits[i].pCommandBuffers,
4747 				.cmd_buffer_count = pSubmits[i].commandBufferCount,
4748 				.wait_dst_stage_mask = wait_dst_stage_mask,
4749 				.flush_caches = !flushed_caches,
4750 				.wait_semaphores = pSubmits[i].pWaitSemaphores,
4751 				.wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
4752 				.signal_semaphores = pSubmits[i].pSignalSemaphores,
4753 				.signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
4754 				.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
4755 				.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
4756 				.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
4757 				.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
4758 				.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
4759 			});
4760 		if (result != VK_SUCCESS)
4761 			return result;
4762 
4763 		flushed_caches  = true;
4764 	}
4765 
4766 	if (fence != VK_NULL_HANDLE && !submitCount) {
4767 		result = radv_signal_fence(queue, fence);
4768 		if (result != VK_SUCCESS)
4769 			return result;
4770 	}
4771 
4772 	return VK_SUCCESS;
4773 }
4774 
radv_QueueWaitIdle(VkQueue _queue)4775 VkResult radv_QueueWaitIdle(
4776 	VkQueue                                     _queue)
4777 {
4778 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
4779 
4780 	pthread_mutex_lock(&queue->pending_mutex);
4781 	while (!list_is_empty(&queue->pending_submissions)) {
4782 		pthread_cond_wait(&queue->device->timeline_cond, &queue->pending_mutex);
4783 	}
4784 	pthread_mutex_unlock(&queue->pending_mutex);
4785 
4786 	if (!queue->device->ws->ctx_wait_idle(queue->hw_ctx,
4787 					      radv_queue_family_to_ring(queue->queue_family_index),
4788 					      queue->queue_idx))
4789 		return VK_ERROR_DEVICE_LOST;
4790 
4791 	return VK_SUCCESS;
4792 }
4793 
radv_DeviceWaitIdle(VkDevice _device)4794 VkResult radv_DeviceWaitIdle(
4795 	VkDevice                                    _device)
4796 {
4797 	RADV_FROM_HANDLE(radv_device, device, _device);
4798 
4799 	for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
4800 		for (unsigned q = 0; q < device->queue_count[i]; q++) {
4801 			VkResult result =
4802 				radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
4803 
4804 			if (result != VK_SUCCESS)
4805 				return result;
4806 		}
4807 	}
4808 	return VK_SUCCESS;
4809 }
4810 
radv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)4811 VkResult radv_EnumerateInstanceExtensionProperties(
4812     const char*                                 pLayerName,
4813     uint32_t*                                   pPropertyCount,
4814     VkExtensionProperties*                      pProperties)
4815 {
4816 	VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
4817 
4818 	for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
4819 		if (radv_instance_extensions_supported.extensions[i]) {
4820 			vk_outarray_append(&out, prop) {
4821 				*prop = radv_instance_extensions[i];
4822 			}
4823 		}
4824 	}
4825 
4826 	return vk_outarray_status(&out);
4827 }
4828 
radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice,const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)4829 VkResult radv_EnumerateDeviceExtensionProperties(
4830     VkPhysicalDevice                            physicalDevice,
4831     const char*                                 pLayerName,
4832     uint32_t*                                   pPropertyCount,
4833     VkExtensionProperties*                      pProperties)
4834 {
4835 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
4836 	VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
4837 
4838 	for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
4839 		if (device->supported_extensions.extensions[i]) {
4840 			vk_outarray_append(&out, prop) {
4841 				*prop = radv_device_extensions[i];
4842 			}
4843 		}
4844 	}
4845 
4846 	return vk_outarray_status(&out);
4847 }
4848 
radv_GetInstanceProcAddr(VkInstance _instance,const char * pName)4849 PFN_vkVoidFunction radv_GetInstanceProcAddr(
4850 	VkInstance                                  _instance,
4851 	const char*                                 pName)
4852 {
4853 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
4854 
4855 	/* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
4856 	 * when we have to return valid function pointers, NULL, or it's left
4857 	 * undefined.  See the table for exact details.
4858 	 */
4859 	if (pName == NULL)
4860 		return NULL;
4861 
4862 #define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
4863 	if (strcmp(pName, "vk" #entrypoint) == 0) \
4864 		return (PFN_vkVoidFunction)radv_##entrypoint
4865 
4866 	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
4867 	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
4868 	LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
4869 	LOOKUP_RADV_ENTRYPOINT(CreateInstance);
4870 
4871 	/* GetInstanceProcAddr() can also be called with a NULL instance.
4872 	 * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
4873 	 */
4874 	LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
4875 
4876 #undef LOOKUP_RADV_ENTRYPOINT
4877 
4878 	if (instance == NULL)
4879 		return NULL;
4880 
4881 	int idx = radv_get_instance_entrypoint_index(pName);
4882 	if (idx >= 0)
4883 		return instance->dispatch.entrypoints[idx];
4884 
4885 	idx = radv_get_physical_device_entrypoint_index(pName);
4886 	if (idx >= 0)
4887 		return instance->physical_device_dispatch.entrypoints[idx];
4888 
4889 	idx = radv_get_device_entrypoint_index(pName);
4890 	if (idx >= 0)
4891 		return instance->device_dispatch.entrypoints[idx];
4892 
4893 	return NULL;
4894 }
4895 
4896 /* The loader wants us to expose a second GetInstanceProcAddr function
4897  * to work around certain LD_PRELOAD issues seen in apps.
4898  */
4899 PUBLIC
4900 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
4901 	VkInstance                                  instance,
4902 	const char*                                 pName);
4903 
4904 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)4905 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
4906 	VkInstance                                  instance,
4907 	const char*                                 pName)
4908 {
4909 	return radv_GetInstanceProcAddr(instance, pName);
4910 }
4911 
4912 PUBLIC
4913 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
4914 	VkInstance                                  _instance,
4915 	const char*                                 pName);
4916 
4917 PUBLIC
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)4918 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
4919 	VkInstance                                  _instance,
4920 	const char*                                 pName)
4921 {
4922 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
4923 
4924 	if (!pName || !instance)
4925 		return NULL;
4926 
4927 	int idx = radv_get_physical_device_entrypoint_index(pName);
4928 	if (idx < 0)
4929 		return NULL;
4930 
4931 	return instance->physical_device_dispatch.entrypoints[idx];
4932 }
4933 
radv_GetDeviceProcAddr(VkDevice _device,const char * pName)4934 PFN_vkVoidFunction radv_GetDeviceProcAddr(
4935 	VkDevice                                    _device,
4936 	const char*                                 pName)
4937 {
4938 	RADV_FROM_HANDLE(radv_device, device, _device);
4939 
4940 	if (!device || !pName)
4941 		return NULL;
4942 
4943 	int idx = radv_get_device_entrypoint_index(pName);
4944 	if (idx < 0)
4945 		return NULL;
4946 
4947 	return device->dispatch.entrypoints[idx];
4948 }
4949 
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)4950 bool radv_get_memory_fd(struct radv_device *device,
4951 			struct radv_device_memory *memory,
4952 			int *pFD)
4953 {
4954 	struct radeon_bo_metadata metadata;
4955 
4956 	if (memory->image) {
4957 		if (memory->image->tiling != VK_IMAGE_TILING_LINEAR)
4958 			radv_init_metadata(device, memory->image, &metadata);
4959 		device->ws->buffer_set_metadata(memory->bo, &metadata);
4960 	}
4961 
4962 	return device->ws->buffer_get_fd(device->ws, memory->bo,
4963 					 pFD);
4964 }
4965 
4966 
4967 void
radv_free_memory(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_device_memory * mem)4968 radv_free_memory(struct radv_device *device,
4969 		 const VkAllocationCallbacks* pAllocator,
4970 		 struct radv_device_memory *mem)
4971 {
4972 	if (mem == NULL)
4973 		return;
4974 
4975 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
4976 	if (mem->android_hardware_buffer)
4977 		AHardwareBuffer_release(mem->android_hardware_buffer);
4978 #endif
4979 
4980 	if (mem->bo) {
4981 		if (device->overallocation_disallowed) {
4982 			mtx_lock(&device->overallocation_mutex);
4983 			device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
4984 			mtx_unlock(&device->overallocation_mutex);
4985 		}
4986 
4987 		radv_bo_list_remove(device, mem->bo);
4988 		device->ws->buffer_destroy(mem->bo);
4989 		mem->bo = NULL;
4990 	}
4991 
4992 	vk_object_base_finish(&mem->base);
4993 	vk_free2(&device->vk.alloc, pAllocator, mem);
4994 }
4995 
radv_alloc_memory(struct radv_device * device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)4996 static VkResult radv_alloc_memory(struct radv_device *device,
4997 				  const VkMemoryAllocateInfo*     pAllocateInfo,
4998 				  const VkAllocationCallbacks*    pAllocator,
4999 				  VkDeviceMemory*                 pMem)
5000 {
5001 	struct radv_device_memory *mem;
5002 	VkResult result;
5003 	enum radeon_bo_domain domain;
5004 	uint32_t flags = 0;
5005 
5006 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5007 
5008 	const VkImportMemoryFdInfoKHR *import_info =
5009 		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5010 	const VkMemoryDedicatedAllocateInfo *dedicate_info =
5011 		vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5012 	const VkExportMemoryAllocateInfo *export_info =
5013 		vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5014 	const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5015 		vk_find_struct_const(pAllocateInfo->pNext,
5016 		                     IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5017 	const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5018 		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5019 
5020 	const struct wsi_memory_allocate_info *wsi_info =
5021 		vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5022 
5023 	if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5024 	    !(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5025 		/* Apparently, this is allowed */
5026 		*pMem = VK_NULL_HANDLE;
5027 		return VK_SUCCESS;
5028 	}
5029 
5030 	mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
5031 			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5032 	if (mem == NULL)
5033 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5034 
5035 	vk_object_base_init(&device->vk, &mem->base,
5036 			    VK_OBJECT_TYPE_DEVICE_MEMORY);
5037 
5038 	if (wsi_info && wsi_info->implicit_sync)
5039 		flags |= RADEON_FLAG_IMPLICIT_SYNC;
5040 
5041 	if (dedicate_info) {
5042 		mem->image = radv_image_from_handle(dedicate_info->image);
5043 		mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5044 	} else {
5045 		mem->image = NULL;
5046 		mem->buffer = NULL;
5047 	}
5048 
5049 	float priority_float = 0.5;
5050 	const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5051 		vk_find_struct_const(pAllocateInfo->pNext,
5052 				     MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5053 	if (priority_ext)
5054 		priority_float = priority_ext->priority;
5055 
5056 	unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5057 	                         (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5058 
5059 	mem->user_ptr = NULL;
5060 	mem->bo = NULL;
5061 
5062 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5063 	mem->android_hardware_buffer = NULL;
5064 #endif
5065 
5066 	if (ahb_import_info) {
5067 		result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5068 		if (result != VK_SUCCESS)
5069 			goto fail;
5070 	} else if(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5071 		result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5072 		if (result != VK_SUCCESS)
5073 			goto fail;
5074 	} else if (import_info) {
5075 		assert(import_info->handleType ==
5076 		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5077 		       import_info->handleType ==
5078 		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5079 		mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
5080 						     priority, NULL);
5081 		if (!mem->bo) {
5082 			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5083 			goto fail;
5084 		} else {
5085 			close(import_info->fd);
5086 		}
5087 
5088 		if (mem->image && mem->image->plane_count == 1 &&
5089 		    !vk_format_is_depth_or_stencil(mem->image->vk_format)) {
5090 			struct radeon_bo_metadata metadata;
5091 			device->ws->buffer_get_metadata(mem->bo, &metadata);
5092 
5093 			struct radv_image_create_info create_info = {
5094 				.no_metadata_planes = true,
5095 				.bo_metadata = &metadata
5096 			};
5097 
5098 			/* This gives a basic ability to import radeonsi images
5099 			 * that don't have DCC. This is not guaranteed by any
5100 			 * spec and can be removed after we support modifiers. */
5101 			result = radv_image_create_layout(device, create_info, mem->image);
5102 			if (result != VK_SUCCESS) {
5103 				device->ws->buffer_destroy(mem->bo);
5104 				goto fail;
5105 			}
5106 		}
5107 	} else if (host_ptr_info) {
5108 		assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5109 		mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5110 		                                      pAllocateInfo->allocationSize,
5111 		                                      priority);
5112 		if (!mem->bo) {
5113 			result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5114 			goto fail;
5115 		} else {
5116 			mem->user_ptr = host_ptr_info->pHostPointer;
5117 		}
5118 	} else {
5119 		uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5120 		uint32_t heap_index;
5121 
5122 		heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
5123 		domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5124 		flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5125 
5126 		if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
5127 			flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5128 			if (device->use_global_bo_list) {
5129 				flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5130 			}
5131 		}
5132 
5133 		if (device->overallocation_disallowed) {
5134 			uint64_t total_size =
5135 				device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5136 
5137 			mtx_lock(&device->overallocation_mutex);
5138 			if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5139 				mtx_unlock(&device->overallocation_mutex);
5140 				result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5141 				goto fail;
5142 			}
5143 			device->allocated_memory_size[heap_index] += alloc_size;
5144 			mtx_unlock(&device->overallocation_mutex);
5145 		}
5146 
5147 		mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
5148 		                                    domain, flags, priority);
5149 
5150 		if (!mem->bo) {
5151 			if (device->overallocation_disallowed) {
5152 				mtx_lock(&device->overallocation_mutex);
5153 				device->allocated_memory_size[heap_index] -= alloc_size;
5154 				mtx_unlock(&device->overallocation_mutex);
5155 			}
5156 			result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5157 			goto fail;
5158 		}
5159 
5160 		mem->heap_index = heap_index;
5161 		mem->alloc_size = alloc_size;
5162 	}
5163 
5164 	if (!wsi_info) {
5165 		result = radv_bo_list_add(device, mem->bo);
5166 		if (result != VK_SUCCESS)
5167 			goto fail;
5168 	}
5169 
5170 	*pMem = radv_device_memory_to_handle(mem);
5171 
5172 	return VK_SUCCESS;
5173 
5174 fail:
5175 	radv_free_memory(device, pAllocator,mem);
5176 
5177 	return result;
5178 }
5179 
radv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5180 VkResult radv_AllocateMemory(
5181 	VkDevice                                    _device,
5182 	const VkMemoryAllocateInfo*                 pAllocateInfo,
5183 	const VkAllocationCallbacks*                pAllocator,
5184 	VkDeviceMemory*                             pMem)
5185 {
5186 	RADV_FROM_HANDLE(radv_device, device, _device);
5187 	return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5188 }
5189 
radv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)5190 void radv_FreeMemory(
5191 	VkDevice                                    _device,
5192 	VkDeviceMemory                              _mem,
5193 	const VkAllocationCallbacks*                pAllocator)
5194 {
5195 	RADV_FROM_HANDLE(radv_device, device, _device);
5196 	RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5197 
5198 	radv_free_memory(device, pAllocator, mem);
5199 }
5200 
radv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)5201 VkResult radv_MapMemory(
5202 	VkDevice                                    _device,
5203 	VkDeviceMemory                              _memory,
5204 	VkDeviceSize                                offset,
5205 	VkDeviceSize                                size,
5206 	VkMemoryMapFlags                            flags,
5207 	void**                                      ppData)
5208 {
5209 	RADV_FROM_HANDLE(radv_device, device, _device);
5210 	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5211 
5212 	if (mem == NULL) {
5213 		*ppData = NULL;
5214 		return VK_SUCCESS;
5215 	}
5216 
5217 	if (mem->user_ptr)
5218 		*ppData = mem->user_ptr;
5219 	else
5220 		*ppData = device->ws->buffer_map(mem->bo);
5221 
5222 	if (*ppData) {
5223 		*ppData += offset;
5224 		return VK_SUCCESS;
5225 	}
5226 
5227 	return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
5228 }
5229 
radv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)5230 void radv_UnmapMemory(
5231 	VkDevice                                    _device,
5232 	VkDeviceMemory                              _memory)
5233 {
5234 	RADV_FROM_HANDLE(radv_device, device, _device);
5235 	RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5236 
5237 	if (mem == NULL)
5238 		return;
5239 
5240 	if (mem->user_ptr == NULL)
5241 		device->ws->buffer_unmap(mem->bo);
5242 }
5243 
radv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5244 VkResult radv_FlushMappedMemoryRanges(
5245 	VkDevice                                    _device,
5246 	uint32_t                                    memoryRangeCount,
5247 	const VkMappedMemoryRange*                  pMemoryRanges)
5248 {
5249 	return VK_SUCCESS;
5250 }
5251 
radv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5252 VkResult radv_InvalidateMappedMemoryRanges(
5253 	VkDevice                                    _device,
5254 	uint32_t                                    memoryRangeCount,
5255 	const VkMappedMemoryRange*                  pMemoryRanges)
5256 {
5257 	return VK_SUCCESS;
5258 }
5259 
radv_GetBufferMemoryRequirements(VkDevice _device,VkBuffer _buffer,VkMemoryRequirements * pMemoryRequirements)5260 void radv_GetBufferMemoryRequirements(
5261 	VkDevice                                    _device,
5262 	VkBuffer                                    _buffer,
5263 	VkMemoryRequirements*                       pMemoryRequirements)
5264 {
5265 	RADV_FROM_HANDLE(radv_device, device, _device);
5266 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5267 
5268 	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5269 
5270 	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5271 		pMemoryRequirements->alignment = 4096;
5272 	else
5273 		pMemoryRequirements->alignment = 16;
5274 
5275 	pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
5276 }
5277 
radv_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5278 void radv_GetBufferMemoryRequirements2(
5279 	VkDevice                                     device,
5280 	const VkBufferMemoryRequirementsInfo2       *pInfo,
5281 	VkMemoryRequirements2                       *pMemoryRequirements)
5282 {
5283 	radv_GetBufferMemoryRequirements(device, pInfo->buffer,
5284                                         &pMemoryRequirements->memoryRequirements);
5285 	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5286 		switch (ext->sType) {
5287 		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5288 			VkMemoryDedicatedRequirements *req =
5289 			               (VkMemoryDedicatedRequirements *) ext;
5290 			req->requiresDedicatedAllocation = false;
5291 			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5292 			break;
5293 		}
5294 		default:
5295 			break;
5296 		}
5297 	}
5298 }
5299 
radv_GetImageMemoryRequirements(VkDevice _device,VkImage _image,VkMemoryRequirements * pMemoryRequirements)5300 void radv_GetImageMemoryRequirements(
5301 	VkDevice                                    _device,
5302 	VkImage                                     _image,
5303 	VkMemoryRequirements*                       pMemoryRequirements)
5304 {
5305 	RADV_FROM_HANDLE(radv_device, device, _device);
5306 	RADV_FROM_HANDLE(radv_image, image, _image);
5307 
5308 	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5309 
5310 	pMemoryRequirements->size = image->size;
5311 	pMemoryRequirements->alignment = image->alignment;
5312 }
5313 
radv_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5314 void radv_GetImageMemoryRequirements2(
5315 	VkDevice                                    device,
5316 	const VkImageMemoryRequirementsInfo2       *pInfo,
5317 	VkMemoryRequirements2                      *pMemoryRequirements)
5318 {
5319 	radv_GetImageMemoryRequirements(device, pInfo->image,
5320                                         &pMemoryRequirements->memoryRequirements);
5321 
5322 	RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5323 
5324 	vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5325 		switch (ext->sType) {
5326 		case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5327 			VkMemoryDedicatedRequirements *req =
5328 			               (VkMemoryDedicatedRequirements *) ext;
5329 			req->requiresDedicatedAllocation = image->shareable &&
5330 			                                   image->tiling != VK_IMAGE_TILING_LINEAR;
5331 			req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5332 			break;
5333 		}
5334 		default:
5335 			break;
5336 		}
5337 	}
5338 }
5339 
radv_GetImageSparseMemoryRequirements(VkDevice device,VkImage image,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements * pSparseMemoryRequirements)5340 void radv_GetImageSparseMemoryRequirements(
5341 	VkDevice                                    device,
5342 	VkImage                                     image,
5343 	uint32_t*                                   pSparseMemoryRequirementCount,
5344 	VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
5345 {
5346 	stub();
5347 }
5348 
radv_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)5349 void radv_GetImageSparseMemoryRequirements2(
5350 	VkDevice                                    device,
5351 	const VkImageSparseMemoryRequirementsInfo2 *pInfo,
5352 	uint32_t*                                   pSparseMemoryRequirementCount,
5353 	VkSparseImageMemoryRequirements2           *pSparseMemoryRequirements)
5354 {
5355 	stub();
5356 }
5357 
radv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)5358 void radv_GetDeviceMemoryCommitment(
5359 	VkDevice                                    device,
5360 	VkDeviceMemory                              memory,
5361 	VkDeviceSize*                               pCommittedMemoryInBytes)
5362 {
5363 	*pCommittedMemoryInBytes = 0;
5364 }
5365 
radv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)5366 VkResult radv_BindBufferMemory2(VkDevice device,
5367                                 uint32_t bindInfoCount,
5368                                 const VkBindBufferMemoryInfo *pBindInfos)
5369 {
5370 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
5371 		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5372 		RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5373 
5374 		if (mem) {
5375 			buffer->bo = mem->bo;
5376 			buffer->offset = pBindInfos[i].memoryOffset;
5377 		} else {
5378 			buffer->bo = NULL;
5379 		}
5380 	}
5381 	return VK_SUCCESS;
5382 }
5383 
radv_BindBufferMemory(VkDevice device,VkBuffer buffer,VkDeviceMemory memory,VkDeviceSize memoryOffset)5384 VkResult radv_BindBufferMemory(
5385 	VkDevice                                    device,
5386 	VkBuffer                                    buffer,
5387 	VkDeviceMemory                              memory,
5388 	VkDeviceSize                                memoryOffset)
5389 {
5390 	const VkBindBufferMemoryInfo info = {
5391 		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5392 		.buffer = buffer,
5393 		.memory = memory,
5394 		.memoryOffset = memoryOffset
5395 	};
5396 
5397 	return radv_BindBufferMemory2(device, 1, &info);
5398 }
5399 
radv_BindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)5400 VkResult radv_BindImageMemory2(VkDevice device,
5401                                uint32_t bindInfoCount,
5402                                const VkBindImageMemoryInfo *pBindInfos)
5403 {
5404 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
5405 		RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5406 		RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5407 
5408 		if (mem) {
5409 			image->bo = mem->bo;
5410 			image->offset = pBindInfos[i].memoryOffset;
5411 		} else {
5412 			image->bo = NULL;
5413 			image->offset = 0;
5414 		}
5415 	}
5416 	return VK_SUCCESS;
5417 }
5418 
5419 
radv_BindImageMemory(VkDevice device,VkImage image,VkDeviceMemory memory,VkDeviceSize memoryOffset)5420 VkResult radv_BindImageMemory(
5421 	VkDevice                                    device,
5422 	VkImage                                     image,
5423 	VkDeviceMemory                              memory,
5424 	VkDeviceSize                                memoryOffset)
5425 {
5426 	const VkBindImageMemoryInfo info = {
5427 		.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5428 		.image = image,
5429 		.memory = memory,
5430 		.memoryOffset = memoryOffset
5431 	};
5432 
5433 	return radv_BindImageMemory2(device, 1, &info);
5434 }
5435 
radv_sparse_bind_has_effects(const VkBindSparseInfo * info)5436 static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5437 {
5438 	return info->bufferBindCount ||
5439 	       info->imageOpaqueBindCount ||
5440 	       info->imageBindCount ||
5441 	       info->waitSemaphoreCount ||
5442 	       info->signalSemaphoreCount;
5443 }
5444 
radv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)5445  VkResult radv_QueueBindSparse(
5446 	VkQueue                                     _queue,
5447 	uint32_t                                    bindInfoCount,
5448 	const VkBindSparseInfo*                     pBindInfo,
5449 	VkFence                                     fence)
5450 {
5451 	RADV_FROM_HANDLE(radv_queue, queue, _queue);
5452 	VkResult result;
5453 	uint32_t fence_idx = 0;
5454 
5455 	if (fence != VK_NULL_HANDLE) {
5456 		for (uint32_t i = 0; i < bindInfoCount; ++i)
5457 			if (radv_sparse_bind_has_effects(pBindInfo + i))
5458 				fence_idx = i;
5459 	} else
5460 		fence_idx = UINT32_MAX;
5461 
5462 	for (uint32_t i = 0; i < bindInfoCount; ++i) {
5463 		if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5464 			continue;
5465 
5466 		const VkTimelineSemaphoreSubmitInfo *timeline_info =
5467 			vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5468 
5469 		VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
5470 				.buffer_binds = pBindInfo[i].pBufferBinds,
5471 				.buffer_bind_count = pBindInfo[i].bufferBindCount,
5472 				.image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5473 				.image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5474 				.wait_semaphores = pBindInfo[i].pWaitSemaphores,
5475 				.wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5476 				.signal_semaphores = pBindInfo[i].pSignalSemaphores,
5477 				.signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5478 				.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5479 				.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5480 				.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
5481 				.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5482 				.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
5483 			});
5484 
5485 		if (result != VK_SUCCESS)
5486 			return result;
5487 	}
5488 
5489 	if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5490 		result = radv_signal_fence(queue, fence);
5491 		if (result != VK_SUCCESS)
5492 			return result;
5493 	}
5494 
5495 	return VK_SUCCESS;
5496 }
5497 
5498 static void
radv_destroy_fence_part(struct radv_device * device,struct radv_fence_part * part)5499 radv_destroy_fence_part(struct radv_device *device,
5500 			struct radv_fence_part *part)
5501 {
5502 	switch (part->kind) {
5503 	case RADV_FENCE_NONE:
5504 		break;
5505 	case RADV_FENCE_WINSYS:
5506 		device->ws->destroy_fence(part->fence);
5507 		break;
5508 	case RADV_FENCE_SYNCOBJ:
5509 		device->ws->destroy_syncobj(device->ws, part->syncobj);
5510 		break;
5511 	case RADV_FENCE_WSI:
5512 		part->fence_wsi->destroy(part->fence_wsi);
5513 		break;
5514 	default:
5515 		unreachable("Invalid fence type");
5516 	}
5517 
5518 	part->kind = RADV_FENCE_NONE;
5519 }
5520 
5521 static void
radv_destroy_fence(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_fence * fence)5522 radv_destroy_fence(struct radv_device *device,
5523 		   const VkAllocationCallbacks *pAllocator,
5524 		   struct radv_fence *fence)
5525 {
5526 	radv_destroy_fence_part(device, &fence->temporary);
5527 	radv_destroy_fence_part(device, &fence->permanent);
5528 
5529 	vk_object_base_finish(&fence->base);
5530 	vk_free2(&device->vk.alloc, pAllocator, fence);
5531 }
5532 
radv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)5533 VkResult radv_CreateFence(
5534 	VkDevice                                    _device,
5535 	const VkFenceCreateInfo*                    pCreateInfo,
5536 	const VkAllocationCallbacks*                pAllocator,
5537 	VkFence*                                    pFence)
5538 {
5539 	RADV_FROM_HANDLE(radv_device, device, _device);
5540 	const VkExportFenceCreateInfo *export =
5541 		vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
5542 	VkExternalFenceHandleTypeFlags handleTypes =
5543 		export ? export->handleTypes : 0;
5544 	struct radv_fence *fence;
5545 
5546 	fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
5547 			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5548 	if (!fence)
5549 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5550 
5551 	vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5552 
5553 	if (device->always_use_syncobj || handleTypes) {
5554 		fence->permanent.kind = RADV_FENCE_SYNCOBJ;
5555 
5556 		bool create_signaled = false;
5557 		if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5558 			create_signaled = true;
5559 
5560 		int ret = device->ws->create_syncobj(device->ws, create_signaled,
5561 						     &fence->permanent.syncobj);
5562 		if (ret) {
5563 			radv_destroy_fence(device, pAllocator, fence);
5564 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5565 		}
5566 	} else {
5567 		fence->permanent.kind = RADV_FENCE_WINSYS;
5568 
5569 		fence->permanent.fence = device->ws->create_fence();
5570 		if (!fence->permanent.fence) {
5571 			vk_free2(&device->vk.alloc, pAllocator, fence);
5572 			radv_destroy_fence(device, pAllocator, fence);
5573 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5574 		}
5575 		if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5576 			device->ws->signal_fence(fence->permanent.fence);
5577 	}
5578 
5579 	*pFence = radv_fence_to_handle(fence);
5580 
5581 	return VK_SUCCESS;
5582 }
5583 
5584 
radv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)5585 void radv_DestroyFence(
5586 	VkDevice                                    _device,
5587 	VkFence                                     _fence,
5588 	const VkAllocationCallbacks*                pAllocator)
5589 {
5590 	RADV_FROM_HANDLE(radv_device, device, _device);
5591 	RADV_FROM_HANDLE(radv_fence, fence, _fence);
5592 
5593 	if (!fence)
5594 		return;
5595 
5596 	radv_destroy_fence(device, pAllocator, fence);
5597 }
5598 
radv_all_fences_plain_and_submitted(struct radv_device * device,uint32_t fenceCount,const VkFence * pFences)5599 static bool radv_all_fences_plain_and_submitted(struct radv_device *device,
5600                                                 uint32_t fenceCount, const VkFence *pFences)
5601 {
5602 	for (uint32_t i = 0; i < fenceCount; ++i) {
5603 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5604 
5605 		struct radv_fence_part *part =
5606 			fence->temporary.kind != RADV_FENCE_NONE ?
5607 			&fence->temporary : &fence->permanent;
5608 		if (part->kind != RADV_FENCE_WINSYS ||
5609 		    !device->ws->is_fence_waitable(part->fence))
5610 			return false;
5611 	}
5612 	return true;
5613 }
5614 
radv_all_fences_syncobj(uint32_t fenceCount,const VkFence * pFences)5615 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
5616 {
5617 	for (uint32_t i = 0; i < fenceCount; ++i) {
5618 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5619 
5620 		struct radv_fence_part *part =
5621 			fence->temporary.kind != RADV_FENCE_NONE ?
5622 			&fence->temporary : &fence->permanent;
5623 		if (part->kind != RADV_FENCE_SYNCOBJ)
5624 			return false;
5625 	}
5626 	return true;
5627 }
5628 
radv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)5629 VkResult radv_WaitForFences(
5630 	VkDevice                                    _device,
5631 	uint32_t                                    fenceCount,
5632 	const VkFence*                              pFences,
5633 	VkBool32                                    waitAll,
5634 	uint64_t                                    timeout)
5635 {
5636 	RADV_FROM_HANDLE(radv_device, device, _device);
5637 	timeout = radv_get_absolute_timeout(timeout);
5638 
5639 	if (device->always_use_syncobj &&
5640 	    radv_all_fences_syncobj(fenceCount, pFences))
5641 	{
5642 		uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
5643 		if (!handles)
5644 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5645 
5646 		for (uint32_t i = 0; i < fenceCount; ++i) {
5647 			RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5648 
5649 			struct radv_fence_part *part =
5650 				fence->temporary.kind != RADV_FENCE_NONE ?
5651 				&fence->temporary : &fence->permanent;
5652 
5653 			assert(part->kind == RADV_FENCE_SYNCOBJ);
5654 			handles[i] = part->syncobj;
5655 		}
5656 
5657 		bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5658 
5659 		free(handles);
5660 		return success ? VK_SUCCESS : VK_TIMEOUT;
5661 	}
5662 
5663 	if (!waitAll && fenceCount > 1) {
5664 		/* Not doing this by default for waitAll, due to needing to allocate twice. */
5665 		if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(device, fenceCount, pFences)) {
5666 			uint32_t wait_count = 0;
5667 			struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
5668 			if (!fences)
5669 				return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5670 
5671 			for (uint32_t i = 0; i < fenceCount; ++i) {
5672 				RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5673 
5674 				struct radv_fence_part *part =
5675 					fence->temporary.kind != RADV_FENCE_NONE ?
5676 					&fence->temporary : &fence->permanent;
5677 				assert(part->kind == RADV_FENCE_WINSYS);
5678 
5679 				if (device->ws->fence_wait(device->ws, part->fence, false, 0)) {
5680 					free(fences);
5681 					return VK_SUCCESS;
5682 				}
5683 
5684 				fences[wait_count++] = part->fence;
5685 			}
5686 
5687 			bool success = device->ws->fences_wait(device->ws, fences, wait_count,
5688 							       waitAll, timeout - radv_get_current_time());
5689 
5690 			free(fences);
5691 			return success ? VK_SUCCESS : VK_TIMEOUT;
5692 		}
5693 
5694 		while(radv_get_current_time() <= timeout) {
5695 			for (uint32_t i = 0; i < fenceCount; ++i) {
5696 				if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
5697 					return VK_SUCCESS;
5698 			}
5699 		}
5700 		return VK_TIMEOUT;
5701 	}
5702 
5703 	for (uint32_t i = 0; i < fenceCount; ++i) {
5704 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5705 		bool expired = false;
5706 
5707 		struct radv_fence_part *part =
5708 			fence->temporary.kind != RADV_FENCE_NONE ?
5709 			&fence->temporary : &fence->permanent;
5710 
5711 		switch (part->kind) {
5712 		case RADV_FENCE_NONE:
5713 			break;
5714 		case RADV_FENCE_WINSYS:
5715 			if (!device->ws->is_fence_waitable(part->fence)) {
5716 				while (!device->ws->is_fence_waitable(part->fence) &&
5717 				      radv_get_current_time() <= timeout)
5718 					/* Do nothing */;
5719 			}
5720 
5721 			expired = device->ws->fence_wait(device->ws,
5722 							 part->fence,
5723 							 true, timeout);
5724 			if (!expired)
5725 				return VK_TIMEOUT;
5726 			break;
5727 		case RADV_FENCE_SYNCOBJ:
5728 			if (!device->ws->wait_syncobj(device->ws,
5729 						      &part->syncobj, 1, true,
5730 						      timeout))
5731 				return VK_TIMEOUT;
5732 			break;
5733 		case RADV_FENCE_WSI: {
5734 			VkResult result = part->fence_wsi->wait(part->fence_wsi, timeout);
5735 			if (result != VK_SUCCESS)
5736 				return result;
5737 			break;
5738 		}
5739 		default:
5740 			unreachable("Invalid fence type");
5741 		}
5742 	}
5743 
5744 	return VK_SUCCESS;
5745 }
5746 
radv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)5747 VkResult radv_ResetFences(VkDevice _device,
5748 			  uint32_t fenceCount,
5749 			  const VkFence *pFences)
5750 {
5751 	RADV_FROM_HANDLE(radv_device, device, _device);
5752 
5753 	for (unsigned i = 0; i < fenceCount; ++i) {
5754 		RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5755 
5756 		/* From the Vulkan 1.0.53 spec:
5757 		 *
5758 		 *    "If any member of pFences currently has its payload
5759 		 *    imported with temporary permanence, that fence’s prior
5760 		 *    permanent payload is irst restored. The remaining
5761 		 *    operations described therefore operate on the restored
5762 		 *    payload."
5763 		 */
5764 		if (fence->temporary.kind != RADV_FENCE_NONE)
5765 			radv_destroy_fence_part(device, &fence->temporary);
5766 
5767 		struct radv_fence_part *part = &fence->permanent;
5768 
5769 		switch (part->kind) {
5770 		case RADV_FENCE_WSI:
5771 			device->ws->reset_fence(part->fence);
5772 			break;
5773 		case RADV_FENCE_SYNCOBJ:
5774 			device->ws->reset_syncobj(device->ws, part->syncobj);
5775 			break;
5776 		default:
5777 			unreachable("Invalid fence type");
5778 		}
5779 	}
5780 
5781 	return VK_SUCCESS;
5782 }
5783 
radv_GetFenceStatus(VkDevice _device,VkFence _fence)5784 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5785 {
5786 	RADV_FROM_HANDLE(radv_device, device, _device);
5787 	RADV_FROM_HANDLE(radv_fence, fence, _fence);
5788 
5789 	struct radv_fence_part *part =
5790 		fence->temporary.kind != RADV_FENCE_NONE ?
5791 		&fence->temporary : &fence->permanent;
5792 
5793 	switch (part->kind) {
5794 	case RADV_FENCE_NONE:
5795 		break;
5796 	case RADV_FENCE_WINSYS:
5797 		if (!device->ws->fence_wait(device->ws, part->fence, false, 0))
5798 			return VK_NOT_READY;
5799 		break;
5800 	case RADV_FENCE_SYNCOBJ: {
5801 		bool success = device->ws->wait_syncobj(device->ws,
5802 							&part->syncobj, 1, true, 0);
5803 		if (!success)
5804 			return VK_NOT_READY;
5805 		break;
5806 	}
5807 	case RADV_FENCE_WSI: {
5808 		VkResult result = part->fence_wsi->wait(part->fence_wsi, 0);
5809 		if (result != VK_SUCCESS) {
5810 			if (result == VK_TIMEOUT)
5811 				return VK_NOT_READY;
5812 			return result;
5813 		}
5814 		break;
5815 	}
5816 	default:
5817 		unreachable("Invalid fence type");
5818 	}
5819 
5820 	return VK_SUCCESS;
5821 }
5822 
5823 
5824 // Queue semaphore functions
5825 
5826 static void
radv_create_timeline(struct radv_timeline * timeline,uint64_t value)5827 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
5828 {
5829 	timeline->highest_signaled = value;
5830 	timeline->highest_submitted = value;
5831 	list_inithead(&timeline->points);
5832 	list_inithead(&timeline->free_points);
5833 	list_inithead(&timeline->waiters);
5834 	pthread_mutex_init(&timeline->mutex, NULL);
5835 }
5836 
5837 static void
radv_destroy_timeline(struct radv_device * device,struct radv_timeline * timeline)5838 radv_destroy_timeline(struct radv_device *device,
5839                       struct radv_timeline *timeline)
5840 {
5841 	list_for_each_entry_safe(struct radv_timeline_point, point,
5842 	                         &timeline->free_points, list) {
5843 		list_del(&point->list);
5844 		device->ws->destroy_syncobj(device->ws, point->syncobj);
5845 		free(point);
5846 	}
5847 	list_for_each_entry_safe(struct radv_timeline_point, point,
5848 	                         &timeline->points, list) {
5849 		list_del(&point->list);
5850 		device->ws->destroy_syncobj(device->ws, point->syncobj);
5851 		free(point);
5852 	}
5853 	pthread_mutex_destroy(&timeline->mutex);
5854 }
5855 
5856 static void
radv_timeline_gc_locked(struct radv_device * device,struct radv_timeline * timeline)5857 radv_timeline_gc_locked(struct radv_device *device,
5858                         struct radv_timeline *timeline)
5859 {
5860 	list_for_each_entry_safe(struct radv_timeline_point, point,
5861 	                         &timeline->points, list) {
5862 		if (point->wait_count || point->value > timeline->highest_submitted)
5863 			return;
5864 
5865 		if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
5866 			timeline->highest_signaled = point->value;
5867 			list_del(&point->list);
5868 			list_add(&point->list, &timeline->free_points);
5869 		}
5870 	}
5871 }
5872 
5873 static struct radv_timeline_point *
radv_timeline_find_point_at_least_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)5874 radv_timeline_find_point_at_least_locked(struct radv_device *device,
5875                                          struct radv_timeline *timeline,
5876                                          uint64_t p)
5877 {
5878 	radv_timeline_gc_locked(device, timeline);
5879 
5880 	if (p <= timeline->highest_signaled)
5881 		return NULL;
5882 
5883 	list_for_each_entry(struct radv_timeline_point, point,
5884 	                    &timeline->points, list) {
5885 		if (point->value >= p) {
5886 			++point->wait_count;
5887 			return point;
5888 		}
5889 	}
5890 	return NULL;
5891 }
5892 
5893 static struct radv_timeline_point *
radv_timeline_add_point_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)5894 radv_timeline_add_point_locked(struct radv_device *device,
5895                                struct radv_timeline *timeline,
5896                                uint64_t p)
5897 {
5898 	radv_timeline_gc_locked(device, timeline);
5899 
5900 	struct radv_timeline_point *ret = NULL;
5901 	struct radv_timeline_point *prev = NULL;
5902 	int r;
5903 
5904 	if (p <= timeline->highest_signaled)
5905 		return NULL;
5906 
5907 	list_for_each_entry(struct radv_timeline_point, point,
5908 	                    &timeline->points, list) {
5909 		if (point->value == p) {
5910 			return NULL;
5911 		}
5912 
5913 		if (point->value < p)
5914 			prev = point;
5915 	}
5916 
5917 	if (list_is_empty(&timeline->free_points)) {
5918 		ret = malloc(sizeof(struct radv_timeline_point));
5919 		r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
5920 		if (r) {
5921 			free(ret);
5922 			return NULL;
5923 		}
5924 	} else {
5925 		ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
5926 		list_del(&ret->list);
5927 
5928 		device->ws->reset_syncobj(device->ws, ret->syncobj);
5929 	}
5930 
5931 	ret->value = p;
5932 	ret->wait_count = 1;
5933 
5934 	if (prev) {
5935 		list_add(&ret->list, &prev->list);
5936 	} else {
5937 		list_addtail(&ret->list, &timeline->points);
5938 	}
5939 	return ret;
5940 }
5941 
5942 
5943 static VkResult
radv_timeline_wait(struct radv_device * device,struct radv_timeline * timeline,uint64_t value,uint64_t abs_timeout)5944 radv_timeline_wait(struct radv_device *device,
5945                    struct radv_timeline *timeline,
5946                    uint64_t value,
5947                    uint64_t abs_timeout)
5948 {
5949 	pthread_mutex_lock(&timeline->mutex);
5950 
5951 	while(timeline->highest_submitted < value) {
5952 		struct timespec abstime;
5953 		timespec_from_nsec(&abstime, abs_timeout);
5954 
5955 		pthread_cond_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
5956 
5957 		if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
5958 			pthread_mutex_unlock(&timeline->mutex);
5959 			return VK_TIMEOUT;
5960 		}
5961 	}
5962 
5963 	struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
5964 	pthread_mutex_unlock(&timeline->mutex);
5965 	if (!point)
5966 		return VK_SUCCESS;
5967 
5968 	bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
5969 
5970 	pthread_mutex_lock(&timeline->mutex);
5971 	point->wait_count--;
5972 	pthread_mutex_unlock(&timeline->mutex);
5973 	return success ? VK_SUCCESS : VK_TIMEOUT;
5974 }
5975 
5976 static void
radv_timeline_trigger_waiters_locked(struct radv_timeline * timeline,struct list_head * processing_list)5977 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
5978                                      struct list_head *processing_list)
5979 {
5980 	list_for_each_entry_safe(struct radv_timeline_waiter, waiter,
5981 	                         &timeline->waiters, list) {
5982 		if (waiter->value > timeline->highest_submitted)
5983 			continue;
5984 
5985 		radv_queue_trigger_submission(waiter->submission, 1, processing_list);
5986 		list_del(&waiter->list);
5987 	}
5988 }
5989 
5990 static
radv_destroy_semaphore_part(struct radv_device * device,struct radv_semaphore_part * part)5991 void radv_destroy_semaphore_part(struct radv_device *device,
5992                                  struct radv_semaphore_part *part)
5993 {
5994 	switch(part->kind) {
5995 	case RADV_SEMAPHORE_NONE:
5996 		break;
5997 	case RADV_SEMAPHORE_WINSYS:
5998 		device->ws->destroy_sem(part->ws_sem);
5999 		break;
6000 	case RADV_SEMAPHORE_TIMELINE:
6001 		radv_destroy_timeline(device, &part->timeline);
6002 		break;
6003 	case RADV_SEMAPHORE_SYNCOBJ:
6004 	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
6005 		device->ws->destroy_syncobj(device->ws, part->syncobj);
6006 		break;
6007 	}
6008 	part->kind = RADV_SEMAPHORE_NONE;
6009 }
6010 
6011 static VkSemaphoreTypeKHR
radv_get_semaphore_type(const void * pNext,uint64_t * initial_value)6012 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6013 {
6014 	const VkSemaphoreTypeCreateInfo *type_info =
6015 		vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6016 
6017 	if (!type_info)
6018 		return VK_SEMAPHORE_TYPE_BINARY;
6019 
6020 	if (initial_value)
6021 		*initial_value = type_info->initialValue;
6022 	return type_info->semaphoreType;
6023 }
6024 
6025 static void
radv_destroy_semaphore(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_semaphore * sem)6026 radv_destroy_semaphore(struct radv_device *device,
6027 		       const VkAllocationCallbacks *pAllocator,
6028 		       struct radv_semaphore *sem)
6029 {
6030 	radv_destroy_semaphore_part(device, &sem->temporary);
6031 	radv_destroy_semaphore_part(device, &sem->permanent);
6032 	vk_object_base_finish(&sem->base);
6033 	vk_free2(&device->vk.alloc, pAllocator, sem);
6034 }
6035 
radv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)6036 VkResult radv_CreateSemaphore(
6037 	VkDevice                                    _device,
6038 	const VkSemaphoreCreateInfo*                pCreateInfo,
6039 	const VkAllocationCallbacks*                pAllocator,
6040 	VkSemaphore*                                pSemaphore)
6041 {
6042 	RADV_FROM_HANDLE(radv_device, device, _device);
6043 	const VkExportSemaphoreCreateInfo *export =
6044 		vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
6045 	VkExternalSemaphoreHandleTypeFlags handleTypes =
6046 		export ? export->handleTypes : 0;
6047 	uint64_t initial_value = 0;
6048 	VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6049 
6050 	struct radv_semaphore *sem = vk_alloc2(&device->vk.alloc, pAllocator,
6051 					       sizeof(*sem), 8,
6052 					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6053 	if (!sem)
6054 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6055 
6056 	vk_object_base_init(&device->vk, &sem->base,
6057 			    VK_OBJECT_TYPE_SEMAPHORE);
6058 
6059 	sem->temporary.kind = RADV_SEMAPHORE_NONE;
6060 	sem->permanent.kind = RADV_SEMAPHORE_NONE;
6061 
6062 	if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
6063 	    device->physical_device->rad_info.has_timeline_syncobj) {
6064 		int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6065 		if (ret) {
6066 			radv_destroy_semaphore(device, pAllocator, sem);
6067 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6068 		}
6069 		device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
6070 		sem->permanent.timeline_syncobj.max_point = initial_value;
6071 		sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
6072 	} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6073 		radv_create_timeline(&sem->permanent.timeline, initial_value);
6074 		sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6075 	} else if (device->always_use_syncobj || handleTypes) {
6076 		assert (device->physical_device->rad_info.has_syncobj);
6077 		int ret = device->ws->create_syncobj(device->ws, false,
6078 						     &sem->permanent.syncobj);
6079 		if (ret) {
6080 			radv_destroy_semaphore(device, pAllocator, sem);
6081 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6082 		}
6083 		sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6084 	} else {
6085 		sem->permanent.ws_sem = device->ws->create_sem(device->ws);
6086 		if (!sem->permanent.ws_sem) {
6087 			radv_destroy_semaphore(device, pAllocator, sem);
6088 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6089 		}
6090 		sem->permanent.kind = RADV_SEMAPHORE_WINSYS;
6091 	}
6092 
6093 	*pSemaphore = radv_semaphore_to_handle(sem);
6094 	return VK_SUCCESS;
6095 }
6096 
radv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)6097 void radv_DestroySemaphore(
6098 	VkDevice                                    _device,
6099 	VkSemaphore                                 _semaphore,
6100 	const VkAllocationCallbacks*                pAllocator)
6101 {
6102 	RADV_FROM_HANDLE(radv_device, device, _device);
6103 	RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6104 	if (!_semaphore)
6105 		return;
6106 
6107 	radv_destroy_semaphore(device, pAllocator, sem);
6108 }
6109 
6110 VkResult
radv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)6111 radv_GetSemaphoreCounterValue(VkDevice _device,
6112 			      VkSemaphore _semaphore,
6113 			      uint64_t* pValue)
6114 {
6115 	RADV_FROM_HANDLE(radv_device, device, _device);
6116 	RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6117 
6118 	struct radv_semaphore_part *part =
6119 		semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6120 
6121 	switch (part->kind) {
6122 	case RADV_SEMAPHORE_TIMELINE: {
6123 		pthread_mutex_lock(&part->timeline.mutex);
6124 		radv_timeline_gc_locked(device, &part->timeline);
6125 		*pValue = part->timeline.highest_signaled;
6126 		pthread_mutex_unlock(&part->timeline.mutex);
6127 		return VK_SUCCESS;
6128 	}
6129 	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6130 		return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
6131 	}
6132 	case RADV_SEMAPHORE_NONE:
6133 	case RADV_SEMAPHORE_SYNCOBJ:
6134 	case RADV_SEMAPHORE_WINSYS:
6135 		unreachable("Invalid semaphore type");
6136 	}
6137 	unreachable("Unhandled semaphore type");
6138 }
6139 
6140 
6141 static VkResult
radv_wait_timelines(struct radv_device * device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t abs_timeout)6142 radv_wait_timelines(struct radv_device *device,
6143                     const VkSemaphoreWaitInfo* pWaitInfo,
6144                     uint64_t abs_timeout)
6145 {
6146 	if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6147 		for (;;) {
6148 			for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6149 				RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6150 				VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6151 
6152 				if (result == VK_SUCCESS)
6153 					return VK_SUCCESS;
6154 			}
6155 			if (radv_get_current_time() > abs_timeout)
6156 				return VK_TIMEOUT;
6157 		}
6158 	}
6159 
6160 	for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6161 		RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6162 		VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
6163 
6164 		if (result != VK_SUCCESS)
6165 			return result;
6166 	}
6167 	return VK_SUCCESS;
6168 }
6169 VkResult
radv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)6170 radv_WaitSemaphores(VkDevice _device,
6171 		    const VkSemaphoreWaitInfo* pWaitInfo,
6172 		    uint64_t timeout)
6173 {
6174 	RADV_FROM_HANDLE(radv_device, device, _device);
6175 	uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6176 
6177 	if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind == RADV_SEMAPHORE_TIMELINE)
6178 		return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6179 
6180 	if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
6181 		return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "semaphoreCount integer overflow");
6182 
6183 	bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
6184 	uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
6185 	if (!handles)
6186 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6187 
6188 	for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6189 		RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6190 		handles[i] = semaphore->permanent.syncobj;
6191 	}
6192 
6193 	bool success = device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
6194 	                                                 pWaitInfo->semaphoreCount, wait_all, false,
6195 	                                                 abs_timeout);
6196 	free(handles);
6197 	return success ? VK_SUCCESS : VK_TIMEOUT;
6198 }
6199 
6200 VkResult
radv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfo * pSignalInfo)6201 radv_SignalSemaphore(VkDevice _device,
6202                      const VkSemaphoreSignalInfo* pSignalInfo)
6203 {
6204 	RADV_FROM_HANDLE(radv_device, device, _device);
6205 	RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6206 
6207 	struct radv_semaphore_part *part =
6208 		semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6209 
6210 	switch(part->kind) {
6211 	case RADV_SEMAPHORE_TIMELINE: {
6212 		pthread_mutex_lock(&part->timeline.mutex);
6213 		radv_timeline_gc_locked(device, &part->timeline);
6214 		part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6215 		part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6216 
6217 		struct list_head processing_list;
6218 		list_inithead(&processing_list);
6219 		radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6220 		pthread_mutex_unlock(&part->timeline.mutex);
6221 
6222 		VkResult result = radv_process_submissions(&processing_list);
6223 
6224 		/* This needs to happen after radv_process_submissions, so
6225 		 * that any submitted submissions that are now unblocked get
6226 		 * processed before we wake the application. This way we
6227 		 * ensure that any binary semaphores that are now unblocked
6228 		 * are usable by the application. */
6229 		pthread_cond_broadcast(&device->timeline_cond);
6230 
6231 		return result;
6232 	}
6233 	case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6234 		part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
6235 		device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
6236 		break;
6237 	}
6238 	case RADV_SEMAPHORE_NONE:
6239 	case RADV_SEMAPHORE_SYNCOBJ:
6240 	case RADV_SEMAPHORE_WINSYS:
6241 		unreachable("Invalid semaphore type");
6242 	}
6243 	return VK_SUCCESS;
6244 }
6245 
radv_destroy_event(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_event * event)6246 static void radv_destroy_event(struct radv_device *device,
6247                                const VkAllocationCallbacks* pAllocator,
6248                                struct radv_event *event)
6249 {
6250 	if (event->bo)
6251 		device->ws->buffer_destroy(event->bo);
6252 
6253 	vk_object_base_finish(&event->base);
6254 	vk_free2(&device->vk.alloc, pAllocator, event);
6255 }
6256 
radv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)6257 VkResult radv_CreateEvent(
6258 	VkDevice                                    _device,
6259 	const VkEventCreateInfo*                    pCreateInfo,
6260 	const VkAllocationCallbacks*                pAllocator,
6261 	VkEvent*                                    pEvent)
6262 {
6263 	RADV_FROM_HANDLE(radv_device, device, _device);
6264 	struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator,
6265 					       sizeof(*event), 8,
6266 					       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6267 
6268 	if (!event)
6269 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6270 
6271 	vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6272 
6273 	event->bo = device->ws->buffer_create(device->ws, 8, 8,
6274 					      RADEON_DOMAIN_GTT,
6275 					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6276 					      RADV_BO_PRIORITY_FENCE);
6277 	if (!event->bo) {
6278 		radv_destroy_event(device, pAllocator, event);
6279 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6280 	}
6281 
6282 	event->map = (uint64_t*)device->ws->buffer_map(event->bo);
6283 	if (!event->map) {
6284 		radv_destroy_event(device, pAllocator, event);
6285 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6286 	}
6287 
6288 	*pEvent = radv_event_to_handle(event);
6289 
6290 	return VK_SUCCESS;
6291 }
6292 
radv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)6293 void radv_DestroyEvent(
6294 	VkDevice                                    _device,
6295 	VkEvent                                     _event,
6296 	const VkAllocationCallbacks*                pAllocator)
6297 {
6298 	RADV_FROM_HANDLE(radv_device, device, _device);
6299 	RADV_FROM_HANDLE(radv_event, event, _event);
6300 
6301 	if (!event)
6302 		return;
6303 
6304 	radv_destroy_event(device, pAllocator, event);
6305 }
6306 
radv_GetEventStatus(VkDevice _device,VkEvent _event)6307 VkResult radv_GetEventStatus(
6308 	VkDevice                                    _device,
6309 	VkEvent                                     _event)
6310 {
6311 	RADV_FROM_HANDLE(radv_event, event, _event);
6312 
6313 	if (*event->map == 1)
6314 		return VK_EVENT_SET;
6315 	return VK_EVENT_RESET;
6316 }
6317 
radv_SetEvent(VkDevice _device,VkEvent _event)6318 VkResult radv_SetEvent(
6319 	VkDevice                                    _device,
6320 	VkEvent                                     _event)
6321 {
6322 	RADV_FROM_HANDLE(radv_event, event, _event);
6323 	*event->map = 1;
6324 
6325 	return VK_SUCCESS;
6326 }
6327 
radv_ResetEvent(VkDevice _device,VkEvent _event)6328 VkResult radv_ResetEvent(
6329     VkDevice                                    _device,
6330     VkEvent                                     _event)
6331 {
6332 	RADV_FROM_HANDLE(radv_event, event, _event);
6333 	*event->map = 0;
6334 
6335 	return VK_SUCCESS;
6336 }
6337 
6338 static void
radv_destroy_buffer(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_buffer * buffer)6339 radv_destroy_buffer(struct radv_device *device,
6340 		    const VkAllocationCallbacks *pAllocator,
6341 		    struct radv_buffer *buffer)
6342 {
6343 	if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
6344 		device->ws->buffer_destroy(buffer->bo);
6345 
6346 	vk_object_base_finish(&buffer->base);
6347 	vk_free2(&device->vk.alloc, pAllocator, buffer);
6348 }
6349 
radv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)6350 VkResult radv_CreateBuffer(
6351 	VkDevice                                    _device,
6352 	const VkBufferCreateInfo*                   pCreateInfo,
6353 	const VkAllocationCallbacks*                pAllocator,
6354 	VkBuffer*                                   pBuffer)
6355 {
6356 	RADV_FROM_HANDLE(radv_device, device, _device);
6357 	struct radv_buffer *buffer;
6358 
6359 	if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6360 		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6361 
6362 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6363 
6364 	buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6365 			     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6366 	if (buffer == NULL)
6367 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6368 
6369 	vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6370 
6371 	buffer->size = pCreateInfo->size;
6372 	buffer->usage = pCreateInfo->usage;
6373 	buffer->bo = NULL;
6374 	buffer->offset = 0;
6375 	buffer->flags = pCreateInfo->flags;
6376 
6377 	buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
6378 						 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6379 
6380 	if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6381 		buffer->bo = device->ws->buffer_create(device->ws,
6382 		                                       align64(buffer->size, 4096),
6383 		                                       4096, 0, RADEON_FLAG_VIRTUAL,
6384 		                                       RADV_BO_PRIORITY_VIRTUAL);
6385 		if (!buffer->bo) {
6386 			radv_destroy_buffer(device, pAllocator, buffer);
6387 			return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6388 		}
6389 	}
6390 
6391 	*pBuffer = radv_buffer_to_handle(buffer);
6392 
6393 	return VK_SUCCESS;
6394 }
6395 
radv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)6396 void radv_DestroyBuffer(
6397 	VkDevice                                    _device,
6398 	VkBuffer                                    _buffer,
6399 	const VkAllocationCallbacks*                pAllocator)
6400 {
6401 	RADV_FROM_HANDLE(radv_device, device, _device);
6402 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6403 
6404 	if (!buffer)
6405 		return;
6406 
6407 	radv_destroy_buffer(device, pAllocator, buffer);
6408 }
6409 
radv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6410 VkDeviceAddress radv_GetBufferDeviceAddress(
6411 	VkDevice                                    device,
6412 	const VkBufferDeviceAddressInfo*         pInfo)
6413 {
6414 	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6415 	return radv_buffer_get_va(buffer->bo) + buffer->offset;
6416 }
6417 
6418 
radv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6419 uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device,
6420 					    const VkBufferDeviceAddressInfo* pInfo)
6421 {
6422 	return 0;
6423 }
6424 
radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)6425 uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6426 						  const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
6427 {
6428 	return 0;
6429 }
6430 
6431 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)6432 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6433 {
6434 	if (stencil)
6435 		return plane->surface.u.legacy.stencil_tiling_index[level];
6436 	else
6437 		return plane->surface.u.legacy.tiling_index[level];
6438 }
6439 
radv_surface_max_layer_count(struct radv_image_view * iview)6440 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
6441 {
6442 	return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
6443 }
6444 
6445 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)6446 radv_init_dcc_control_reg(struct radv_device *device,
6447 			  struct radv_image_view *iview)
6448 {
6449 	unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
6450 	unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
6451 	unsigned max_compressed_block_size;
6452 	unsigned independent_128b_blocks;
6453 	unsigned independent_64b_blocks;
6454 
6455 	if (!radv_dcc_enabled(iview->image, iview->base_mip))
6456 		return 0;
6457 
6458 	if (!device->physical_device->rad_info.has_dedicated_vram) {
6459 		/* amdvlk: [min-compressed-block-size] should be set to 32 for
6460 		 * dGPU and 64 for APU because all of our APUs to date use
6461 		 * DIMMs which have a request granularity size of 64B while all
6462 		 * other chips have a 32B request size.
6463 		 */
6464 		min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
6465 	}
6466 
6467 	if (device->physical_device->rad_info.chip_class >= GFX10) {
6468 		max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6469 		independent_64b_blocks = 0;
6470 		independent_128b_blocks = 1;
6471 	} else {
6472 		independent_128b_blocks = 0;
6473 
6474 		if (iview->image->info.samples > 1) {
6475 			if (iview->image->planes[0].surface.bpe == 1)
6476 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6477 			else if (iview->image->planes[0].surface.bpe == 2)
6478 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6479 		}
6480 
6481 		if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
6482 					   VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6483 					   VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6484 			/* If this DCC image is potentially going to be used in texture
6485 			 * fetches, we need some special settings.
6486 			 */
6487 			independent_64b_blocks = 1;
6488 			max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6489 		} else {
6490 			/* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6491 			 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6492 			 * big as possible for better compression state.
6493 			 */
6494 			independent_64b_blocks = 0;
6495 			max_compressed_block_size = max_uncompressed_block_size;
6496 		}
6497 	}
6498 
6499 	return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6500 	       S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6501 	       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6502 	       S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6503 	       S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6504 }
6505 
6506 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)6507 radv_initialise_color_surface(struct radv_device *device,
6508 			      struct radv_color_buffer_info *cb,
6509 			      struct radv_image_view *iview)
6510 {
6511 	const struct vk_format_description *desc;
6512 	unsigned ntype, format, swap, endian;
6513 	unsigned blend_clamp = 0, blend_bypass = 0;
6514 	uint64_t va;
6515 	const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6516 	const struct radeon_surf *surf = &plane->surface;
6517 
6518 	desc = vk_format_description(iview->vk_format);
6519 
6520 	memset(cb, 0, sizeof(*cb));
6521 
6522 	/* Intensity is implemented as Red, so treat it that way. */
6523 	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
6524 
6525 	va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
6526 
6527 	cb->cb_color_base = va >> 8;
6528 
6529 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6530 		if (device->physical_device->rad_info.chip_class >= GFX10) {
6531 			cb->cb_color_attrib3 |=	S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6532 				S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6533 				S_028EE0_CMASK_PIPE_ALIGNED(1) |
6534 				S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
6535 		} else {
6536 			struct gfx9_surf_meta_flags meta = {
6537 				.rb_aligned = 1,
6538 				.pipe_aligned = 1,
6539 			};
6540 
6541 			if (surf->dcc_offset)
6542 				meta = surf->u.gfx9.dcc;
6543 
6544 			cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6545 				S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6546 				S_028C74_RB_ALIGNED(meta.rb_aligned) |
6547 				S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6548 			cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
6549 		}
6550 
6551 		cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6552 		cb->cb_color_base |= surf->tile_swizzle;
6553 	} else {
6554 		const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6555 		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6556 
6557 		cb->cb_color_base += level_info->offset >> 8;
6558 		if (level_info->mode == RADEON_SURF_MODE_2D)
6559 			cb->cb_color_base |= surf->tile_swizzle;
6560 
6561 		pitch_tile_max = level_info->nblk_x / 8 - 1;
6562 		slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6563 		tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6564 
6565 		cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6566 		cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6567 		cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
6568 
6569 		cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6570 
6571 		if (radv_image_has_fmask(iview->image)) {
6572 			if (device->physical_device->rad_info.chip_class >= GFX7)
6573 				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
6574 			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
6575 			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
6576 		} else {
6577 			/* This must be set for fast clear to work without FMASK. */
6578 			if (device->physical_device->rad_info.chip_class >= GFX7)
6579 				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6580 			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6581 			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6582 		}
6583 	}
6584 
6585 	/* CMASK variables */
6586 	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6587 	va += surf->cmask_offset;
6588 	cb->cb_color_cmask = va >> 8;
6589 
6590 	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6591 	va += surf->dcc_offset;
6592 
6593 	if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6594 	    device->physical_device->rad_info.chip_class <= GFX8)
6595 		va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
6596 
6597 	unsigned dcc_tile_swizzle = surf->tile_swizzle;
6598 	dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
6599 
6600 	cb->cb_dcc_base = va >> 8;
6601 	cb->cb_dcc_base |= dcc_tile_swizzle;
6602 
6603 	/* GFX10 field has the same base shift as the GFX6 field. */
6604 	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6605 	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
6606 		S_028C6C_SLICE_MAX_GFX10(max_slice);
6607 
6608 	if (iview->image->info.samples > 1) {
6609 		unsigned log_samples = util_logbase2(iview->image->info.samples);
6610 
6611 		cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
6612 			S_028C74_NUM_FRAGMENTS(log_samples);
6613 	}
6614 
6615 	if (radv_image_has_fmask(iview->image)) {
6616 		va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
6617 		cb->cb_color_fmask = va >> 8;
6618 		cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6619 	} else {
6620 		cb->cb_color_fmask = cb->cb_color_base;
6621 	}
6622 
6623 	ntype = radv_translate_color_numformat(iview->vk_format,
6624 					       desc,
6625 					       vk_format_get_first_non_void_channel(iview->vk_format));
6626 	format = radv_translate_colorformat(iview->vk_format);
6627 	if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
6628 		radv_finishme("Illegal color\n");
6629 	swap = radv_translate_colorswap(iview->vk_format, false);
6630 	endian = radv_colorformat_endian_swap(format);
6631 
6632 	/* blend clamp should be set for all NORM/SRGB types */
6633 	if (ntype == V_028C70_NUMBER_UNORM ||
6634 	    ntype == V_028C70_NUMBER_SNORM ||
6635 	    ntype == V_028C70_NUMBER_SRGB)
6636 		blend_clamp = 1;
6637 
6638 	/* set blend bypass according to docs if SINT/UINT or
6639 	   8/24 COLOR variants */
6640 	if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6641 	    format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6642 	    format == V_028C70_COLOR_X24_8_32_FLOAT) {
6643 		blend_clamp = 0;
6644 		blend_bypass = 1;
6645 	}
6646 #if 0
6647 	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6648 	    (format == V_028C70_COLOR_8 ||
6649 	     format == V_028C70_COLOR_8_8 ||
6650 	     format == V_028C70_COLOR_8_8_8_8))
6651 		->color_is_int8 = true;
6652 #endif
6653 	cb->cb_color_info = S_028C70_FORMAT(format) |
6654 		S_028C70_COMP_SWAP(swap) |
6655 		S_028C70_BLEND_CLAMP(blend_clamp) |
6656 		S_028C70_BLEND_BYPASS(blend_bypass) |
6657 		S_028C70_SIMPLE_FLOAT(1) |
6658 		S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
6659 				    ntype != V_028C70_NUMBER_SNORM &&
6660 				    ntype != V_028C70_NUMBER_SRGB &&
6661 				    format != V_028C70_COLOR_8_24 &&
6662 				    format != V_028C70_COLOR_24_8) |
6663 		S_028C70_NUMBER_TYPE(ntype) |
6664 		S_028C70_ENDIAN(endian);
6665 	if (radv_image_has_fmask(iview->image)) {
6666 		cb->cb_color_info |= S_028C70_COMPRESSION(1);
6667 		if (device->physical_device->rad_info.chip_class == GFX6) {
6668 			unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
6669 			cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6670 		}
6671 
6672 		if (radv_image_is_tc_compat_cmask(iview->image)) {
6673 			/* Allow the texture block to read FMASK directly
6674 			 * without decompressing it. This bit must be cleared
6675 			 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6676 			 * otherwise the operation doesn't happen.
6677 			 */
6678 			cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6679 
6680 			/* Set CMASK into a tiling format that allows the
6681 			 * texture block to read it.
6682 			 */
6683 			cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6684 		}
6685 	}
6686 
6687 	if (radv_image_has_cmask(iview->image) &&
6688 	    !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6689 		cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6690 
6691 	if (radv_dcc_enabled(iview->image, iview->base_mip))
6692 		cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6693 
6694 	cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6695 
6696 	/* This must be set for fast clear to work without FMASK. */
6697 	if (!radv_image_has_fmask(iview->image) &&
6698 	    device->physical_device->rad_info.chip_class == GFX6) {
6699 		unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6700 		cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6701 	}
6702 
6703 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6704 		const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
6705 
6706 		unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
6707 		  (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
6708 		unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
6709 		unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
6710 
6711 		if (device->physical_device->rad_info.chip_class >= GFX10) {
6712 			cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6713 
6714 			cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6715 					        S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6716 					        S_028EE0_RESOURCE_LEVEL(1);
6717 		} else {
6718 			cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6719 			cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
6720 					       S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6721 		}
6722 
6723 		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
6724 			S_028C68_MIP0_HEIGHT(height - 1) |
6725 			S_028C68_MAX_MIP(iview->image->info.levels - 1);
6726 	}
6727 }
6728 
6729 static unsigned
radv_calc_decompress_on_z_planes(struct radv_device * device,struct radv_image_view * iview)6730 radv_calc_decompress_on_z_planes(struct radv_device *device,
6731 				 struct radv_image_view *iview)
6732 {
6733 	unsigned max_zplanes = 0;
6734 
6735 	assert(radv_image_is_tc_compat_htile(iview->image));
6736 
6737 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6738 		/* Default value for 32-bit depth surfaces. */
6739 		max_zplanes = 4;
6740 
6741 		if (iview->vk_format == VK_FORMAT_D16_UNORM &&
6742 		    iview->image->info.samples > 1)
6743 			max_zplanes = 2;
6744 
6745 		max_zplanes = max_zplanes + 1;
6746 	} else {
6747 		if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6748 			/* Do not enable Z plane compression for 16-bit depth
6749 			 * surfaces because isn't supported on GFX8. Only
6750 			 * 32-bit depth surfaces are supported by the hardware.
6751 			 * This allows to maintain shader compatibility and to
6752 			 * reduce the number of depth decompressions.
6753 			 */
6754 			max_zplanes = 1;
6755 		} else {
6756 			if (iview->image->info.samples <= 1)
6757 				max_zplanes = 5;
6758 			else if (iview->image->info.samples <= 4)
6759 				max_zplanes = 3;
6760 			else
6761 				max_zplanes = 2;
6762 		}
6763 	}
6764 
6765 	return max_zplanes;
6766 }
6767 
6768 void
radv_initialise_ds_surface(struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview)6769 radv_initialise_ds_surface(struct radv_device *device,
6770 			   struct radv_ds_buffer_info *ds,
6771 			   struct radv_image_view *iview)
6772 {
6773 	unsigned level = iview->base_mip;
6774 	unsigned format, stencil_format;
6775 	uint64_t va, s_offs, z_offs;
6776 	bool stencil_only = false;
6777 	const struct radv_image_plane *plane = &iview->image->planes[0];
6778 	const struct radeon_surf *surf = &plane->surface;
6779 
6780 	assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6781 
6782 	memset(ds, 0, sizeof(*ds));
6783 	switch (iview->image->vk_format) {
6784 	case VK_FORMAT_D24_UNORM_S8_UINT:
6785 	case VK_FORMAT_X8_D24_UNORM_PACK32:
6786 		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6787 		ds->offset_scale = 2.0f;
6788 		break;
6789 	case VK_FORMAT_D16_UNORM:
6790 	case VK_FORMAT_D16_UNORM_S8_UINT:
6791 		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6792 		ds->offset_scale = 4.0f;
6793 		break;
6794 	case VK_FORMAT_D32_SFLOAT:
6795 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
6796 		ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
6797 			S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6798 		ds->offset_scale = 1.0f;
6799 		break;
6800 	case VK_FORMAT_S8_UINT:
6801 		stencil_only = true;
6802 		break;
6803 	default:
6804 		break;
6805 	}
6806 
6807 	format = radv_translate_dbformat(iview->image->vk_format);
6808 	stencil_format = surf->has_stencil ?
6809 		V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6810 
6811 	uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6812 	ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
6813 		S_028008_SLICE_MAX(max_slice);
6814 	if (device->physical_device->rad_info.chip_class >= GFX10) {
6815 		ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
6816 				     S_028008_SLICE_MAX_HI(max_slice >> 11);
6817 	}
6818 
6819 	ds->db_htile_data_base = 0;
6820 	ds->db_htile_surface = 0;
6821 
6822 	va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6823 	s_offs = z_offs = va;
6824 
6825 	if (device->physical_device->rad_info.chip_class >= GFX9) {
6826 		assert(surf->u.gfx9.surf_offset == 0);
6827 		s_offs += surf->u.gfx9.stencil_offset;
6828 
6829 		ds->db_z_info = S_028038_FORMAT(format) |
6830 			S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6831 			S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6832 			S_028038_MAXMIP(iview->image->info.levels - 1) |
6833 			S_028038_ZRANGE_PRECISION(1);
6834 		ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
6835 			S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
6836 
6837 		if (device->physical_device->rad_info.chip_class == GFX9) {
6838 			ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
6839 			ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
6840 		}
6841 
6842 		ds->db_depth_view |= S_028008_MIPID(level);
6843 		ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6844 			S_02801C_Y_MAX(iview->image->info.height - 1);
6845 
6846 		if (radv_htile_enabled(iview->image, level)) {
6847 			ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6848 
6849 			if (radv_image_is_tc_compat_htile(iview->image)) {
6850 				unsigned max_zplanes =
6851 					radv_calc_decompress_on_z_planes(device, iview);
6852 
6853 				ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6854 
6855 				if (device->physical_device->rad_info.chip_class >= GFX10) {
6856 					ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6857 					ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6858 				} else {
6859 					ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6860 					ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6861 				}
6862 			}
6863 
6864 			if (!surf->has_stencil)
6865 				/* Use all of the htile_buffer for depth if there's no stencil. */
6866 				ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6867 			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
6868 				surf->htile_offset;
6869 			ds->db_htile_data_base = va >> 8;
6870 			ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
6871 				S_028ABC_PIPE_ALIGNED(1);
6872 
6873 			if (device->physical_device->rad_info.chip_class == GFX9) {
6874 				ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6875 			}
6876 		}
6877 	} else {
6878 		const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6879 
6880 		if (stencil_only)
6881 			level_info = &surf->u.legacy.stencil_level[level];
6882 
6883 		z_offs += surf->u.legacy.level[level].offset;
6884 		s_offs += surf->u.legacy.stencil_level[level].offset;
6885 
6886 		ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6887 		ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6888 		ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6889 
6890 		if (iview->image->info.samples > 1)
6891 			ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
6892 
6893 		if (device->physical_device->rad_info.chip_class >= GFX7) {
6894 			struct radeon_info *info = &device->physical_device->rad_info;
6895 			unsigned tiling_index = surf->u.legacy.tiling_index[level];
6896 			unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
6897 			unsigned macro_index = surf->u.legacy.macro_tile_index;
6898 			unsigned tile_mode = info->si_tile_mode_array[tiling_index];
6899 			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
6900 			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
6901 
6902 			if (stencil_only)
6903 				tile_mode = stencil_tile_mode;
6904 
6905 			ds->db_depth_info |=
6906 				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
6907 				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
6908 				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
6909 				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
6910 				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
6911 				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
6912 			ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
6913 			ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
6914 		} else {
6915 			unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
6916 			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6917 			tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
6918 			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
6919 			if (stencil_only)
6920 				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6921 		}
6922 
6923 		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
6924 			S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
6925 		ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
6926 
6927 		if (radv_htile_enabled(iview->image, level)) {
6928 			ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
6929 
6930 			if (!surf->has_stencil &&
6931 			    !radv_image_is_tc_compat_htile(iview->image))
6932 				/* Use all of the htile_buffer for depth if there's no stencil. */
6933 				ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
6934 
6935 			va = radv_buffer_get_va(iview->bo) + iview->image->offset +
6936 				surf->htile_offset;
6937 			ds->db_htile_data_base = va >> 8;
6938 			ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
6939 
6940 			if (radv_image_is_tc_compat_htile(iview->image)) {
6941 				unsigned max_zplanes =
6942 					radv_calc_decompress_on_z_planes(device, iview);
6943 
6944 				ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
6945 				ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6946 			}
6947 		}
6948 	}
6949 
6950 	ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
6951 	ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
6952 }
6953 
radv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)6954 VkResult radv_CreateFramebuffer(
6955 	VkDevice                                    _device,
6956 	const VkFramebufferCreateInfo*              pCreateInfo,
6957 	const VkAllocationCallbacks*                pAllocator,
6958 	VkFramebuffer*                              pFramebuffer)
6959 {
6960 	RADV_FROM_HANDLE(radv_device, device, _device);
6961 	struct radv_framebuffer *framebuffer;
6962 	const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
6963 		vk_find_struct_const(pCreateInfo->pNext,
6964 			FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
6965 
6966 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
6967 
6968 	size_t size = sizeof(*framebuffer);
6969 	if (!imageless_create_info)
6970 		size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
6971 	framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
6972 				  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6973 	if (framebuffer == NULL)
6974 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6975 
6976 	vk_object_base_init(&device->vk, &framebuffer->base,
6977 			    VK_OBJECT_TYPE_FRAMEBUFFER);
6978 
6979 	framebuffer->attachment_count = pCreateInfo->attachmentCount;
6980 	framebuffer->width = pCreateInfo->width;
6981 	framebuffer->height = pCreateInfo->height;
6982 	framebuffer->layers = pCreateInfo->layers;
6983 	if (imageless_create_info) {
6984 		for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
6985 			const VkFramebufferAttachmentImageInfo *attachment =
6986 				imageless_create_info->pAttachmentImageInfos + i;
6987 			framebuffer->width = MIN2(framebuffer->width, attachment->width);
6988 			framebuffer->height = MIN2(framebuffer->height, attachment->height);
6989 			framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
6990 		}
6991 	} else {
6992 		for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
6993 			VkImageView _iview = pCreateInfo->pAttachments[i];
6994 			struct radv_image_view *iview = radv_image_view_from_handle(_iview);
6995 			framebuffer->attachments[i] = iview;
6996 			framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
6997 			framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
6998 			framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
6999 		}
7000 	}
7001 
7002 	*pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7003 	return VK_SUCCESS;
7004 }
7005 
radv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)7006 void radv_DestroyFramebuffer(
7007 	VkDevice                                    _device,
7008 	VkFramebuffer                               _fb,
7009 	const VkAllocationCallbacks*                pAllocator)
7010 {
7011 	RADV_FROM_HANDLE(radv_device, device, _device);
7012 	RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7013 
7014 	if (!fb)
7015 		return;
7016 	vk_object_base_finish(&fb->base);
7017 	vk_free2(&device->vk.alloc, pAllocator, fb);
7018 }
7019 
radv_tex_wrap(VkSamplerAddressMode address_mode)7020 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
7021 {
7022 	switch (address_mode) {
7023 	case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7024 		return V_008F30_SQ_TEX_WRAP;
7025 	case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7026 		return V_008F30_SQ_TEX_MIRROR;
7027 	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7028 		return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7029 	case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7030 		return V_008F30_SQ_TEX_CLAMP_BORDER;
7031 	case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7032 		return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7033 	default:
7034 		unreachable("illegal tex wrap mode");
7035 		break;
7036 	}
7037 }
7038 
7039 static unsigned
radv_tex_compare(VkCompareOp op)7040 radv_tex_compare(VkCompareOp op)
7041 {
7042 	switch (op) {
7043 	case VK_COMPARE_OP_NEVER:
7044 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7045 	case VK_COMPARE_OP_LESS:
7046 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7047 	case VK_COMPARE_OP_EQUAL:
7048 		return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7049 	case VK_COMPARE_OP_LESS_OR_EQUAL:
7050 		return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7051 	case VK_COMPARE_OP_GREATER:
7052 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7053 	case VK_COMPARE_OP_NOT_EQUAL:
7054 		return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7055 	case VK_COMPARE_OP_GREATER_OR_EQUAL:
7056 		return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7057 	case VK_COMPARE_OP_ALWAYS:
7058 		return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7059 	default:
7060 		unreachable("illegal compare mode");
7061 		break;
7062 	}
7063 }
7064 
7065 static unsigned
radv_tex_filter(VkFilter filter,unsigned max_ansio)7066 radv_tex_filter(VkFilter filter, unsigned max_ansio)
7067 {
7068 	switch (filter) {
7069 	case VK_FILTER_NEAREST:
7070 		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
7071 			V_008F38_SQ_TEX_XY_FILTER_POINT);
7072 	case VK_FILTER_LINEAR:
7073 		return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
7074 			V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7075 	case VK_FILTER_CUBIC_IMG:
7076 	default:
7077 		fprintf(stderr, "illegal texture filter");
7078 		return 0;
7079 	}
7080 }
7081 
7082 static unsigned
radv_tex_mipfilter(VkSamplerMipmapMode mode)7083 radv_tex_mipfilter(VkSamplerMipmapMode mode)
7084 {
7085 	switch (mode) {
7086 	case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7087 		return V_008F38_SQ_TEX_Z_FILTER_POINT;
7088 	case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7089 		return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7090 	default:
7091 		return V_008F38_SQ_TEX_Z_FILTER_NONE;
7092 	}
7093 }
7094 
7095 static unsigned
radv_tex_bordercolor(VkBorderColor bcolor)7096 radv_tex_bordercolor(VkBorderColor bcolor)
7097 {
7098 	switch (bcolor) {
7099 	case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7100 	case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7101 		return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7102 	case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7103 	case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7104 		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7105 	case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7106 	case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7107 		return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7108 	case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7109 	case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7110 		return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7111 	default:
7112 		break;
7113 	}
7114 	return 0;
7115 }
7116 
7117 static unsigned
radv_tex_aniso_filter(unsigned filter)7118 radv_tex_aniso_filter(unsigned filter)
7119 {
7120 	if (filter < 2)
7121 		return 0;
7122 	if (filter < 4)
7123 		return 1;
7124 	if (filter < 8)
7125 		return 2;
7126 	if (filter < 16)
7127 		return 3;
7128 	return 4;
7129 }
7130 
7131 static unsigned
radv_tex_filter_mode(VkSamplerReductionMode mode)7132 radv_tex_filter_mode(VkSamplerReductionMode mode)
7133 {
7134 	switch (mode) {
7135 	case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7136 		return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7137 	case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7138 		return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7139 	case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7140 		return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7141 	default:
7142 		break;
7143 	}
7144 	return 0;
7145 }
7146 
7147 static uint32_t
radv_get_max_anisotropy(struct radv_device * device,const VkSamplerCreateInfo * pCreateInfo)7148 radv_get_max_anisotropy(struct radv_device *device,
7149 			const VkSamplerCreateInfo *pCreateInfo)
7150 {
7151 	if (device->force_aniso >= 0)
7152 		return device->force_aniso;
7153 
7154 	if (pCreateInfo->anisotropyEnable &&
7155 	    pCreateInfo->maxAnisotropy > 1.0f)
7156 		return (uint32_t)pCreateInfo->maxAnisotropy;
7157 
7158 	return 0;
7159 }
7160 
S_FIXED(float value,unsigned frac_bits)7161 static inline int S_FIXED(float value, unsigned frac_bits)
7162 {
7163 	return value * (1 << frac_bits);
7164 }
7165 
radv_register_border_color(struct radv_device * device,VkClearColorValue value)7166 static uint32_t radv_register_border_color(struct radv_device *device,
7167 					   VkClearColorValue   value)
7168 {
7169 	uint32_t slot;
7170 
7171 	pthread_mutex_lock(&device->border_color_data.mutex);
7172 
7173 	for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7174 		if (!device->border_color_data.used[slot]) {
7175 			/* Copy to the GPU wrt endian-ness. */
7176 			util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot],
7177 						&value,
7178 						sizeof(VkClearColorValue));
7179 
7180 			device->border_color_data.used[slot] = true;
7181 			break;
7182 		}
7183 	}
7184 
7185 	pthread_mutex_unlock(&device->border_color_data.mutex);
7186 
7187 	return slot;
7188 }
7189 
radv_unregister_border_color(struct radv_device * device,uint32_t slot)7190 static void radv_unregister_border_color(struct radv_device *device,
7191 					 uint32_t            slot)
7192 {
7193 	pthread_mutex_lock(&device->border_color_data.mutex);
7194 
7195 	device->border_color_data.used[slot] = false;
7196 
7197 	pthread_mutex_unlock(&device->border_color_data.mutex);
7198 }
7199 
7200 static void
radv_init_sampler(struct radv_device * device,struct radv_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)7201 radv_init_sampler(struct radv_device *device,
7202 		  struct radv_sampler *sampler,
7203 		  const VkSamplerCreateInfo *pCreateInfo)
7204 {
7205 	uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7206 	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7207 	bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7208 			   device->physical_device->rad_info.chip_class == GFX9;
7209 	unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7210 	unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7211 	bool trunc_coord = pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7212 	bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7213 				 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7214 				 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7215 	VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7216 	uint32_t border_color_ptr;
7217 
7218 	const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7219 		vk_find_struct_const(pCreateInfo->pNext,
7220 				     SAMPLER_REDUCTION_MODE_CREATE_INFO);
7221 	if (sampler_reduction)
7222 		filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7223 
7224 	if (pCreateInfo->compareEnable)
7225 		depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7226 
7227 	sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7228 
7229 	if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7230 		const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7231 			vk_find_struct_const(pCreateInfo->pNext,
7232 					     SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7233 
7234 		assert(custom_border_color);
7235 
7236 		sampler->border_color_slot =
7237 			radv_register_border_color(device, custom_border_color->customBorderColor);
7238 
7239 		/* Did we fail to find a slot? */
7240 		if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7241 			fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7242 			border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7243 		}
7244 	}
7245 
7246 	/* If we don't have a custom color, set the ptr to 0 */
7247 	border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT
7248 		? sampler->border_color_slot
7249 		: 0;
7250 
7251 	sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7252 			     S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7253 			     S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7254 			     S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
7255 			     S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7256 			     S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7257 			     S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
7258 			     S_008F30_ANISO_BIAS(max_aniso_ratio) |
7259 			     S_008F30_DISABLE_CUBE_WRAP(0) |
7260 			     S_008F30_COMPAT_MODE(compat_mode) |
7261 			     S_008F30_FILTER_MODE(filter_mode) |
7262 			     S_008F30_TRUNC_COORD(trunc_coord));
7263 	sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7264 			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7265 			     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7266 	sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7267 			     S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7268 			     S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7269 			     S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7270 			     S_008F38_MIP_POINT_PRECLAMP(0));
7271 	sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7272 			     S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7273 
7274 	if (device->physical_device->rad_info.chip_class >= GFX10) {
7275 		sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7276 	} else {
7277 		sampler->state[2] |=
7278 			S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7279 			S_008F38_FILTER_PREC_FIX(1) |
7280 			S_008F38_ANISO_OVERRIDE_GFX6(device->physical_device->rad_info.chip_class >= GFX8);
7281 	}
7282 }
7283 
radv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)7284 VkResult radv_CreateSampler(
7285 	VkDevice                                    _device,
7286 	const VkSamplerCreateInfo*                  pCreateInfo,
7287 	const VkAllocationCallbacks*                pAllocator,
7288 	VkSampler*                                  pSampler)
7289 {
7290 	RADV_FROM_HANDLE(radv_device, device, _device);
7291 	struct radv_sampler *sampler;
7292 
7293 	const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7294 		vk_find_struct_const(pCreateInfo->pNext,
7295 				     SAMPLER_YCBCR_CONVERSION_INFO);
7296 
7297 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7298 
7299 	sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7300 			      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7301 	if (!sampler)
7302 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7303 
7304 	vk_object_base_init(&device->vk, &sampler->base,
7305 			    VK_OBJECT_TYPE_SAMPLER);
7306 
7307 	radv_init_sampler(device, sampler, pCreateInfo);
7308 
7309 	sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
7310 	*pSampler = radv_sampler_to_handle(sampler);
7311 
7312 	return VK_SUCCESS;
7313 }
7314 
radv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)7315 void radv_DestroySampler(
7316 	VkDevice                                    _device,
7317 	VkSampler                                   _sampler,
7318 	const VkAllocationCallbacks*                pAllocator)
7319 {
7320 	RADV_FROM_HANDLE(radv_device, device, _device);
7321 	RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7322 
7323 	if (!sampler)
7324 		return;
7325 
7326 	if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7327 		radv_unregister_border_color(device, sampler->border_color_slot);
7328 
7329 	vk_object_base_finish(&sampler->base);
7330 	vk_free2(&device->vk.alloc, pAllocator, sampler);
7331 }
7332 
7333 /* vk_icd.h does not declare this function, so we declare it here to
7334  * suppress Wmissing-prototypes.
7335  */
7336 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
7337 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
7338 
7339 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)7340 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7341 {
7342 	/* For the full details on loader interface versioning, see
7343 	* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7344 	* What follows is a condensed summary, to help you navigate the large and
7345 	* confusing official doc.
7346 	*
7347 	*   - Loader interface v0 is incompatible with later versions. We don't
7348 	*     support it.
7349 	*
7350 	*   - In loader interface v1:
7351 	*       - The first ICD entrypoint called by the loader is
7352 	*         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7353 	*         entrypoint.
7354 	*       - The ICD must statically expose no other Vulkan symbol unless it is
7355 	*         linked with -Bsymbolic.
7356 	*       - Each dispatchable Vulkan handle created by the ICD must be
7357 	*         a pointer to a struct whose first member is VK_LOADER_DATA. The
7358 	*         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7359 	*       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7360 	*         vkDestroySurfaceKHR(). The ICD must be capable of working with
7361 	*         such loader-managed surfaces.
7362 	*
7363 	*    - Loader interface v2 differs from v1 in:
7364 	*       - The first ICD entrypoint called by the loader is
7365 	*         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7366 	*         statically expose this entrypoint.
7367 	*
7368 	*    - Loader interface v3 differs from v2 in:
7369 	*        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7370 	*          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7371 	*          because the loader no longer does so.
7372 	*/
7373 	*pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7374 	return VK_SUCCESS;
7375 }
7376 
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)7377 VkResult radv_GetMemoryFdKHR(VkDevice _device,
7378 			     const VkMemoryGetFdInfoKHR *pGetFdInfo,
7379 			     int *pFD)
7380 {
7381 	RADV_FROM_HANDLE(radv_device, device, _device);
7382 	RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7383 
7384 	assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7385 
7386 	/* At the moment, we support only the below handle types. */
7387 	assert(pGetFdInfo->handleType ==
7388 	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7389 	       pGetFdInfo->handleType ==
7390 	       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7391 
7392 	bool ret = radv_get_memory_fd(device, memory, pFD);
7393 	if (ret == false)
7394 		return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7395 	return VK_SUCCESS;
7396 }
7397 
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)7398 static uint32_t radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7399                                                         enum radeon_bo_domain domains,
7400                                                         enum radeon_bo_flag flags,
7401                                                         enum radeon_bo_flag ignore_flags)
7402 {
7403 	/* Don't count GTT/CPU as relevant:
7404 	 *
7405 	 * - We're not fully consistent between the two.
7406 	 * - Sometimes VRAM gets VRAM|GTT.
7407 	 */
7408 	const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM |
7409 	                                               RADEON_DOMAIN_GDS |
7410 	                                               RADEON_DOMAIN_OA;
7411 	uint32_t bits = 0;
7412 	for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7413 		if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7414 			continue;
7415 
7416 		if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7417 			continue;
7418 
7419 		bits |= 1u << i;
7420 	}
7421 
7422 	return bits;
7423 }
7424 
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)7425 static uint32_t radv_compute_valid_memory_types(struct radv_physical_device *dev,
7426                                                 enum radeon_bo_domain domains,
7427                                                 enum radeon_bo_flag flags)
7428 {
7429 	enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7430 	uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7431 
7432 	if (!bits) {
7433 		ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7434 		bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7435 	}
7436 
7437 	return bits;
7438 }
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)7439 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
7440 				       VkExternalMemoryHandleTypeFlagBits handleType,
7441 				       int fd,
7442 				       VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7443 {
7444 	RADV_FROM_HANDLE(radv_device, device, _device);
7445 
7446 	switch (handleType) {
7447 	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7448 		enum radeon_bo_domain domains;
7449 		enum radeon_bo_flag flags;
7450 		if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7451 			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7452 
7453 		pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
7454 		return VK_SUCCESS;
7455 	}
7456 	default:
7457 		/* The valid usage section for this function says:
7458 		 *
7459 		 *    "handleType must not be one of the handle types defined as
7460 		 *    opaque."
7461 		 *
7462 		 * So opaque handle types fall into the default "unsupported" case.
7463 		 */
7464 		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7465 	}
7466 }
7467 
radv_import_opaque_fd(struct radv_device * device,int fd,uint32_t * syncobj)7468 static VkResult radv_import_opaque_fd(struct radv_device *device,
7469                                       int fd,
7470                                       uint32_t *syncobj)
7471 {
7472 	uint32_t syncobj_handle = 0;
7473 	int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7474 	if (ret != 0)
7475 		return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7476 
7477 	if (*syncobj)
7478 		device->ws->destroy_syncobj(device->ws, *syncobj);
7479 
7480 	*syncobj = syncobj_handle;
7481 	close(fd);
7482 
7483 	return VK_SUCCESS;
7484 }
7485 
radv_import_sync_fd(struct radv_device * device,int fd,uint32_t * syncobj)7486 static VkResult radv_import_sync_fd(struct radv_device *device,
7487                                     int fd,
7488                                     uint32_t *syncobj)
7489 {
7490 	/* If we create a syncobj we do it locally so that if we have an error, we don't
7491 	 * leave a syncobj in an undetermined state in the fence. */
7492 	uint32_t syncobj_handle =  *syncobj;
7493 	if (!syncobj_handle) {
7494 		bool create_signaled = fd == -1 ? true : false;
7495 
7496 		int ret = device->ws->create_syncobj(device->ws, create_signaled,
7497 						     &syncobj_handle);
7498 		if (ret) {
7499 			return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7500 		}
7501 	} else {
7502 		if (fd == -1)
7503 			device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
7504 	}
7505 
7506 	if (fd != -1) {
7507 		int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7508 		if (ret)
7509 			return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7510 		close(fd);
7511 	}
7512 
7513 	*syncobj = syncobj_handle;
7514 
7515 	return VK_SUCCESS;
7516 }
7517 
radv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)7518 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
7519 				   const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7520 {
7521 	RADV_FROM_HANDLE(radv_device, device, _device);
7522 	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7523 	VkResult result;
7524 	struct radv_semaphore_part *dst = NULL;
7525 	bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7526 
7527 	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7528 		assert(!timeline);
7529 		dst = &sem->temporary;
7530 	} else {
7531 		dst = &sem->permanent;
7532 	}
7533 
7534 	uint32_t syncobj = (dst->kind == RADV_SEMAPHORE_SYNCOBJ ||
7535 	                    dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) ? dst->syncobj : 0;
7536 
7537 	switch(pImportSemaphoreFdInfo->handleType) {
7538 		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7539 			result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7540 			break;
7541 		case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7542 			assert(!timeline);
7543 			result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7544 			break;
7545 		default:
7546 			unreachable("Unhandled semaphore handle type");
7547 	}
7548 
7549 	if (result == VK_SUCCESS) {
7550 		dst->syncobj = syncobj;
7551 		dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7552 		if (timeline) {
7553 			dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7554 			dst->timeline_syncobj.max_point = 0;
7555 		}
7556 	}
7557 
7558 	return result;
7559 }
7560 
radv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)7561 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
7562 				const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
7563 				int *pFd)
7564 {
7565 	RADV_FROM_HANDLE(radv_device, device, _device);
7566 	RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7567 	int ret;
7568 	uint32_t syncobj_handle;
7569 
7570 	if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7571 		assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
7572 		       sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7573 		syncobj_handle = sem->temporary.syncobj;
7574 	} else {
7575 		assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
7576 		       sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7577 		syncobj_handle = sem->permanent.syncobj;
7578 	}
7579 
7580 	switch(pGetFdInfo->handleType) {
7581 	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7582 		ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7583 		if (ret)
7584 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7585 		break;
7586 	case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7587 		ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7588 		if (ret)
7589 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7590 
7591 		if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7592 			radv_destroy_semaphore_part(device, &sem->temporary);
7593 		} else {
7594 			device->ws->reset_syncobj(device->ws, syncobj_handle);
7595 		}
7596 		break;
7597 	default:
7598 		unreachable("Unhandled semaphore handle type");
7599 	}
7600 
7601 	return VK_SUCCESS;
7602 }
7603 
radv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)7604 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
7605 	VkPhysicalDevice                            physicalDevice,
7606 	const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7607 	VkExternalSemaphoreProperties               *pExternalSemaphoreProperties)
7608 {
7609 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7610 	VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7611 
7612 	if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
7613 	    pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7614 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7615 		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7616 		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7617 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7618 	} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7619 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7620 		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7621 		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7622 
7623 	/* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
7624 	} else if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7625 	           (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7626 	            pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
7627 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7628 		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7629 		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7630 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7631 	} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7632 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7633 		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7634 		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7635 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7636 	} else {
7637 		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7638 		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7639 		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7640 	}
7641 }
7642 
radv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)7643 VkResult radv_ImportFenceFdKHR(VkDevice _device,
7644 				   const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7645 {
7646 	RADV_FROM_HANDLE(radv_device, device, _device);
7647 	RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7648 	struct radv_fence_part *dst = NULL;
7649 	VkResult result;
7650 
7651 	if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7652 		dst = &fence->temporary;
7653 	} else {
7654 		dst = &fence->permanent;
7655 	}
7656 
7657 	uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
7658 
7659 	switch(pImportFenceFdInfo->handleType) {
7660 		case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7661 			result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
7662 			break;
7663 		case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7664 			result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
7665 			break;
7666 		default:
7667 			unreachable("Unhandled fence handle type");
7668 	}
7669 
7670 	if (result == VK_SUCCESS) {
7671 		dst->syncobj = syncobj;
7672 		dst->kind = RADV_FENCE_SYNCOBJ;
7673 	}
7674 
7675 	return result;
7676 }
7677 
radv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)7678 VkResult radv_GetFenceFdKHR(VkDevice _device,
7679 				const VkFenceGetFdInfoKHR *pGetFdInfo,
7680 				int *pFd)
7681 {
7682 	RADV_FROM_HANDLE(radv_device, device, _device);
7683 	RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7684 	int ret;
7685 
7686 	struct radv_fence_part *part =
7687 		fence->temporary.kind != RADV_FENCE_NONE ?
7688 		&fence->temporary : &fence->permanent;
7689 
7690 	switch(pGetFdInfo->handleType) {
7691 	case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7692 		ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
7693 		if (ret)
7694 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7695 		break;
7696 	case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7697 		ret = device->ws->export_syncobj_to_sync_file(device->ws,
7698 							      part->syncobj, pFd);
7699 		if (ret)
7700 			return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7701 
7702 		if (part == &fence->temporary) {
7703 			radv_destroy_fence_part(device, part);
7704 		} else {
7705 			device->ws->reset_syncobj(device->ws, part->syncobj);
7706 		}
7707 		break;
7708 	default:
7709 		unreachable("Unhandled fence handle type");
7710 	}
7711 
7712 	return VK_SUCCESS;
7713 }
7714 
radv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)7715 void radv_GetPhysicalDeviceExternalFenceProperties(
7716 	VkPhysicalDevice                            physicalDevice,
7717 	const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7718 	VkExternalFenceProperties               *pExternalFenceProperties)
7719 {
7720 	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7721 
7722 	if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7723 	    (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7724 	     pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
7725 		pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7726 		pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7727 		pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
7728 			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7729 	} else {
7730 		pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7731 		pExternalFenceProperties->compatibleHandleTypes = 0;
7732 		pExternalFenceProperties->externalFenceFeatures = 0;
7733 	}
7734 }
7735 
7736 VkResult
radv_CreateDebugReportCallbackEXT(VkInstance _instance,const VkDebugReportCallbackCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDebugReportCallbackEXT * pCallback)7737 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
7738                                  const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
7739                                  const VkAllocationCallbacks* pAllocator,
7740                                  VkDebugReportCallbackEXT* pCallback)
7741 {
7742 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
7743 	return vk_create_debug_report_callback(&instance->debug_report_callbacks,
7744 	                                       pCreateInfo, pAllocator, &instance->alloc,
7745 	                                       pCallback);
7746 }
7747 
7748 void
radv_DestroyDebugReportCallbackEXT(VkInstance _instance,VkDebugReportCallbackEXT _callback,const VkAllocationCallbacks * pAllocator)7749 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
7750                                   VkDebugReportCallbackEXT _callback,
7751                                   const VkAllocationCallbacks* pAllocator)
7752 {
7753 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
7754 	vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
7755 	                                 _callback, pAllocator, &instance->alloc);
7756 }
7757 
7758 void
radv_DebugReportMessageEXT(VkInstance _instance,VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT objectType,uint64_t object,size_t location,int32_t messageCode,const char * pLayerPrefix,const char * pMessage)7759 radv_DebugReportMessageEXT(VkInstance _instance,
7760                           VkDebugReportFlagsEXT flags,
7761                           VkDebugReportObjectTypeEXT objectType,
7762                           uint64_t object,
7763                           size_t location,
7764                           int32_t messageCode,
7765                           const char* pLayerPrefix,
7766                           const char* pMessage)
7767 {
7768 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
7769 	vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
7770 	                object, location, messageCode, pLayerPrefix, pMessage);
7771 }
7772 
7773 void
radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)7774 radv_GetDeviceGroupPeerMemoryFeatures(
7775     VkDevice                                    device,
7776     uint32_t                                    heapIndex,
7777     uint32_t                                    localDeviceIndex,
7778     uint32_t                                    remoteDeviceIndex,
7779     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
7780 {
7781 	assert(localDeviceIndex == remoteDeviceIndex);
7782 
7783 	*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
7784 	                       VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7785 	                       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
7786 	                       VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7787 }
7788 
7789 static const VkTimeDomainEXT radv_time_domains[] = {
7790 	VK_TIME_DOMAIN_DEVICE_EXT,
7791 	VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
7792 #ifdef CLOCK_MONOTONIC_RAW
7793 	VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
7794 #endif
7795 };
7796 
radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)7797 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
7798 	VkPhysicalDevice                             physicalDevice,
7799 	uint32_t                                     *pTimeDomainCount,
7800 	VkTimeDomainEXT                              *pTimeDomains)
7801 {
7802 	int d;
7803 	VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
7804 
7805 	for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
7806 		vk_outarray_append(&out, i) {
7807 			*i = radv_time_domains[d];
7808 		}
7809 	}
7810 
7811 	return vk_outarray_status(&out);
7812 }
7813 
7814 static uint64_t
radv_clock_gettime(clockid_t clock_id)7815 radv_clock_gettime(clockid_t clock_id)
7816 {
7817 	struct timespec current;
7818 	int ret;
7819 
7820 	ret = clock_gettime(clock_id, &current);
7821 #ifdef CLOCK_MONOTONIC_RAW
7822 	if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
7823 		ret = clock_gettime(CLOCK_MONOTONIC, &current);
7824 #endif
7825 	if (ret < 0)
7826 		return 0;
7827 
7828 	return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
7829 }
7830 
radv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)7831 VkResult radv_GetCalibratedTimestampsEXT(
7832 	VkDevice                                     _device,
7833 	uint32_t                                     timestampCount,
7834 	const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
7835 	uint64_t                                     *pTimestamps,
7836 	uint64_t                                     *pMaxDeviation)
7837 {
7838 	RADV_FROM_HANDLE(radv_device, device, _device);
7839 	uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
7840 	int d;
7841 	uint64_t begin, end;
7842         uint64_t max_clock_period = 0;
7843 
7844 #ifdef CLOCK_MONOTONIC_RAW
7845 	begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7846 #else
7847 	begin = radv_clock_gettime(CLOCK_MONOTONIC);
7848 #endif
7849 
7850 	for (d = 0; d < timestampCount; d++) {
7851 		switch (pTimestampInfos[d].timeDomain) {
7852 		case VK_TIME_DOMAIN_DEVICE_EXT:
7853 			pTimestamps[d] = device->ws->query_value(device->ws,
7854 								 RADEON_TIMESTAMP);
7855                         uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
7856                         max_clock_period = MAX2(max_clock_period, device_period);
7857 			break;
7858 		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
7859 			pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
7860                         max_clock_period = MAX2(max_clock_period, 1);
7861 			break;
7862 
7863 #ifdef CLOCK_MONOTONIC_RAW
7864 		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
7865 			pTimestamps[d] = begin;
7866 			break;
7867 #endif
7868 		default:
7869 			pTimestamps[d] = 0;
7870 			break;
7871 		}
7872 	}
7873 
7874 #ifdef CLOCK_MONOTONIC_RAW
7875 	end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7876 #else
7877 	end = radv_clock_gettime(CLOCK_MONOTONIC);
7878 #endif
7879 
7880         /*
7881          * The maximum deviation is the sum of the interval over which we
7882          * perform the sampling and the maximum period of any sampled
7883          * clock. That's because the maximum skew between any two sampled
7884          * clock edges is when the sampled clock with the largest period is
7885          * sampled at the end of that period but right at the beginning of the
7886          * sampling interval and some other clock is sampled right at the
7887          * begining of its sampling period and right at the end of the
7888          * sampling interval. Let's assume the GPU has the longest clock
7889          * period and that the application is sampling GPU and monotonic:
7890          *
7891          *                               s                 e
7892          *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
7893          *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7894          *
7895          *                               g
7896          *		  0         1         2         3
7897          *	GPU       -----_____-----_____-----_____-----_____
7898          *
7899          *                                                m
7900          *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
7901          *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7902          *
7903          *	Interval                     <----------------->
7904          *	Deviation           <-------------------------->
7905          *
7906          *		s  = read(raw)       2
7907          *		g  = read(GPU)       1
7908          *		m  = read(monotonic) 2
7909          *		e  = read(raw)       b
7910          *
7911          * We round the sample interval up by one tick to cover sampling error
7912          * in the interval clock
7913          */
7914 
7915         uint64_t sample_interval = end - begin + 1;
7916 
7917         *pMaxDeviation = sample_interval + max_clock_period;
7918 
7919 	return VK_SUCCESS;
7920 }
7921 
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)7922 void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
7923     VkPhysicalDevice                            physicalDevice,
7924     VkSampleCountFlagBits                       samples,
7925     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
7926 {
7927 	if (samples & (VK_SAMPLE_COUNT_2_BIT |
7928 		       VK_SAMPLE_COUNT_4_BIT |
7929 		       VK_SAMPLE_COUNT_8_BIT)) {
7930 		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
7931 	} else {
7932 		pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
7933 	}
7934 }
7935 
radv_CreatePrivateDataSlotEXT(VkDevice _device,const VkPrivateDataSlotCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPrivateDataSlotEXT * pPrivateDataSlot)7936 VkResult radv_CreatePrivateDataSlotEXT(
7937     VkDevice                                    _device,
7938     const VkPrivateDataSlotCreateInfoEXT*       pCreateInfo,
7939     const VkAllocationCallbacks*                pAllocator,
7940     VkPrivateDataSlotEXT*                       pPrivateDataSlot)
7941 {
7942 	RADV_FROM_HANDLE(radv_device, device, _device);
7943 	return vk_private_data_slot_create(&device->vk, pCreateInfo, pAllocator,
7944 					   pPrivateDataSlot);
7945 }
7946 
radv_DestroyPrivateDataSlotEXT(VkDevice _device,VkPrivateDataSlotEXT privateDataSlot,const VkAllocationCallbacks * pAllocator)7947 void radv_DestroyPrivateDataSlotEXT(
7948     VkDevice                                    _device,
7949     VkPrivateDataSlotEXT                        privateDataSlot,
7950     const VkAllocationCallbacks*                pAllocator)
7951 {
7952 	RADV_FROM_HANDLE(radv_device, device, _device);
7953 	vk_private_data_slot_destroy(&device->vk, privateDataSlot, pAllocator);
7954 }
7955 
radv_SetPrivateDataEXT(VkDevice _device,VkObjectType objectType,uint64_t objectHandle,VkPrivateDataSlotEXT privateDataSlot,uint64_t data)7956 VkResult radv_SetPrivateDataEXT(
7957     VkDevice                                    _device,
7958     VkObjectType                                objectType,
7959     uint64_t                                    objectHandle,
7960     VkPrivateDataSlotEXT                        privateDataSlot,
7961     uint64_t                                    data)
7962 {
7963 	RADV_FROM_HANDLE(radv_device, device, _device);
7964 	return vk_object_base_set_private_data(&device->vk, objectType,
7965 					       objectHandle, privateDataSlot,
7966 					       data);
7967 }
7968 
radv_GetPrivateDataEXT(VkDevice _device,VkObjectType objectType,uint64_t objectHandle,VkPrivateDataSlotEXT privateDataSlot,uint64_t * pData)7969 void radv_GetPrivateDataEXT(
7970     VkDevice                                    _device,
7971     VkObjectType                                objectType,
7972     uint64_t                                    objectHandle,
7973     VkPrivateDataSlotEXT                        privateDataSlot,
7974     uint64_t*                                   pData)
7975 {
7976 	RADV_FROM_HANDLE(radv_device, device, _device);
7977 	vk_object_base_get_private_data(&device->vk, objectType, objectHandle,
7978 					privateDataSlot, pData);
7979 }
7980