1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #include "tu_private.h"
29 #include "tu_cs.h"
30 #include "git_sha1.h"
31
32 #include <fcntl.h>
33 #include <poll.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <sys/sysinfo.h>
37 #include <unistd.h>
38
39 #include "util/debug.h"
40 #include "util/disk_cache.h"
41 #include "util/driconf.h"
42 #include "util/os_misc.h"
43 #include "util/u_atomic.h"
44 #include "vk_format.h"
45 #include "vk_util.h"
46
47 /* for fd_get_driver/device_uuid() */
48 #include "freedreno/common/freedreno_uuid.h"
49
50 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
51 defined(VK_USE_PLATFORM_XCB_KHR) || \
52 defined(VK_USE_PLATFORM_XLIB_KHR) || \
53 defined(VK_USE_PLATFORM_DISPLAY_KHR)
54 #define TU_HAS_SURFACE 1
55 #else
56 #define TU_HAS_SURFACE 0
57 #endif
58
59
60 static int
tu_device_get_cache_uuid(uint16_t family,void * uuid)61 tu_device_get_cache_uuid(uint16_t family, void *uuid)
62 {
63 uint32_t mesa_timestamp;
64 uint16_t f = family;
65 memset(uuid, 0, VK_UUID_SIZE);
66 if (!disk_cache_get_function_timestamp(tu_device_get_cache_uuid,
67 &mesa_timestamp))
68 return -1;
69
70 memcpy(uuid, &mesa_timestamp, 4);
71 memcpy((char *) uuid + 4, &f, 2);
72 snprintf((char *) uuid + 6, VK_UUID_SIZE - 10, "tu");
73 return 0;
74 }
75
76 #define TU_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
77
78 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)79 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
80 {
81 *pApiVersion = TU_API_VERSION;
82 return VK_SUCCESS;
83 }
84
85 static const struct vk_instance_extension_table tu_instance_extensions_supported = {
86 .KHR_device_group_creation = true,
87 .KHR_external_fence_capabilities = true,
88 .KHR_external_memory_capabilities = true,
89 .KHR_external_semaphore_capabilities = true,
90 .KHR_get_physical_device_properties2 = true,
91 .KHR_surface = TU_HAS_SURFACE,
92 .KHR_get_surface_capabilities2 = TU_HAS_SURFACE,
93 .EXT_debug_report = true,
94 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
95 .KHR_wayland_surface = true,
96 #endif
97 #ifdef VK_USE_PLATFORM_XCB_KHR
98 .KHR_xcb_surface = true,
99 #endif
100 #ifdef VK_USE_PLATFORM_XLIB_KHR
101 .KHR_xlib_surface = true,
102 #endif
103 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
104 .EXT_acquire_xlib_display = true,
105 #endif
106 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
107 .KHR_display = true,
108 .KHR_get_display_properties2 = true,
109 .EXT_direct_mode_display = true,
110 .EXT_display_surface_counter = true,
111 #endif
112 };
113
114 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)115 get_device_extensions(const struct tu_physical_device *device,
116 struct vk_device_extension_table *ext)
117 {
118 *ext = (struct vk_device_extension_table) {
119 .KHR_16bit_storage = device->info->a6xx.storage_16bit,
120 .KHR_bind_memory2 = true,
121 .KHR_copy_commands2 = true,
122 .KHR_create_renderpass2 = true,
123 .KHR_dedicated_allocation = true,
124 .KHR_depth_stencil_resolve = true,
125 .KHR_descriptor_update_template = true,
126 .KHR_device_group = true,
127 .KHR_draw_indirect_count = true,
128 .KHR_external_fence = true,
129 .KHR_external_fence_fd = true,
130 .KHR_external_memory = true,
131 .KHR_external_memory_fd = true,
132 .KHR_external_semaphore = true,
133 .KHR_external_semaphore_fd = true,
134 .KHR_get_memory_requirements2 = true,
135 .KHR_imageless_framebuffer = true,
136 .KHR_incremental_present = TU_HAS_SURFACE,
137 .KHR_image_format_list = true,
138 .KHR_maintenance1 = true,
139 .KHR_maintenance2 = true,
140 .KHR_maintenance3 = true,
141 .KHR_multiview = true,
142 .KHR_performance_query = device->instance->debug_flags & TU_DEBUG_PERFC,
143 .KHR_pipeline_executable_properties = true,
144 .KHR_push_descriptor = true,
145 .KHR_relaxed_block_layout = true,
146 .KHR_sampler_mirror_clamp_to_edge = true,
147 .KHR_sampler_ycbcr_conversion = true,
148 .KHR_shader_draw_parameters = true,
149 .KHR_shader_float_controls = true,
150 .KHR_shader_float16_int8 = true,
151 .KHR_shader_subgroup_extended_types = true,
152 .KHR_shader_terminate_invocation = true,
153 .KHR_spirv_1_4 = true,
154 .KHR_storage_buffer_storage_class = true,
155 .KHR_swapchain = TU_HAS_SURFACE,
156 .KHR_uniform_buffer_standard_layout = true,
157 .KHR_variable_pointers = true,
158 .KHR_vulkan_memory_model = true,
159 .KHR_driver_properties = true,
160 .KHR_separate_depth_stencil_layouts = true,
161 .KHR_buffer_device_address = true,
162 .KHR_shader_integer_dot_product = true,
163 .KHR_zero_initialize_workgroup_memory = true,
164 .KHR_shader_non_semantic_info = true,
165 #ifndef TU_USE_KGSL
166 .KHR_timeline_semaphore = true,
167 #endif
168 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
169 .EXT_display_control = true,
170 #endif
171 .EXT_external_memory_dma_buf = true,
172 .EXT_image_drm_format_modifier = true,
173 .EXT_sample_locations = device->info->a6xx.has_sample_locations,
174 .EXT_sampler_filter_minmax = true,
175 .EXT_transform_feedback = true,
176 .EXT_4444_formats = true,
177 .EXT_conditional_rendering = true,
178 .EXT_custom_border_color = true,
179 .EXT_depth_clip_control = true,
180 .EXT_depth_clip_enable = true,
181 .EXT_descriptor_indexing = true,
182 .EXT_extended_dynamic_state = true,
183 .EXT_extended_dynamic_state2 = true,
184 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
185 .EXT_host_query_reset = true,
186 .EXT_index_type_uint8 = true,
187 .EXT_memory_budget = true,
188 .EXT_primitive_topology_list_restart = true,
189 .EXT_private_data = true,
190 .EXT_queue_family_foreign = true,
191 .EXT_robustness2 = true,
192 .EXT_scalar_block_layout = true,
193 .EXT_separate_stencil_usage = true,
194 .EXT_shader_demote_to_helper_invocation = true,
195 .EXT_shader_stencil_export = true,
196 .EXT_shader_viewport_index_layer = true,
197 .EXT_texel_buffer_alignment = true,
198 .EXT_vertex_attribute_divisor = true,
199 .EXT_provoking_vertex = true,
200 .EXT_line_rasterization = true,
201 .EXT_subgroup_size_control = true,
202 .EXT_image_robustness = true,
203 #ifndef TU_USE_KGSL
204 .EXT_physical_device_drm = true,
205 #endif
206 /* For Graphics Flight Recorder (GFR) */
207 .AMD_buffer_marker = true,
208 .ARM_rasterization_order_attachment_access = true,
209 #ifdef ANDROID
210 .ANDROID_native_buffer = true,
211 #endif
212 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
213 .VALVE_mutable_descriptor_type = true,
214 };
215 }
216
217 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)218 tu_physical_device_init(struct tu_physical_device *device,
219 struct tu_instance *instance)
220 {
221 VkResult result = VK_SUCCESS;
222
223 const char *fd_name = fd_dev_name(&device->dev_id);
224 if (strncmp(fd_name, "FD", 2) == 0) {
225 device->name = vk_asprintf(&instance->vk.alloc,
226 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
227 "Turnip Adreno (TM) %s", &fd_name[2]);
228 } else {
229 device->name = vk_strdup(&instance->vk.alloc, fd_name,
230 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
231
232 }
233 if (!device->name) {
234 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
235 "device name alloc fail");
236 }
237
238 const struct fd_dev_info *info = fd_dev_info(&device->dev_id);
239 if (!info) {
240 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
241 "device %s is unsupported", device->name);
242 goto fail_free_name;
243 }
244 switch (fd_dev_gen(&device->dev_id)) {
245 case 6:
246 device->info = info;
247 device->ccu_offset_bypass = device->info->num_ccu * A6XX_CCU_DEPTH_SIZE;
248 device->ccu_offset_gmem = (device->gmem_size -
249 device->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
250 break;
251 default:
252 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
253 "device %s is unsupported", device->name);
254 goto fail_free_name;
255 }
256 if (tu_device_get_cache_uuid(fd_dev_gpu_id(&device->dev_id), device->cache_uuid)) {
257 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
258 "cannot generate UUID");
259 goto fail_free_name;
260 }
261
262 /* The gpu id is already embedded in the uuid so we just pass "tu"
263 * when creating the cache.
264 */
265 char buf[VK_UUID_SIZE * 2 + 1];
266 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
267 device->disk_cache = disk_cache_create(device->name, buf, 0);
268
269 fd_get_driver_uuid(device->driver_uuid);
270 fd_get_device_uuid(device->device_uuid, &device->dev_id);
271
272 struct vk_device_extension_table supported_extensions;
273 get_device_extensions(device, &supported_extensions);
274
275 struct vk_physical_device_dispatch_table dispatch_table;
276 vk_physical_device_dispatch_table_from_entrypoints(
277 &dispatch_table, &tu_physical_device_entrypoints, true);
278 vk_physical_device_dispatch_table_from_entrypoints(
279 &dispatch_table, &wsi_physical_device_entrypoints, false);
280
281 result = vk_physical_device_init(&device->vk, &instance->vk,
282 &supported_extensions,
283 &dispatch_table);
284 if (result != VK_SUCCESS)
285 goto fail_free_cache;
286
287 #if TU_HAS_SURFACE
288 result = tu_wsi_init(device);
289 if (result != VK_SUCCESS) {
290 vk_startup_errorf(instance, result, "WSI init failure");
291 vk_physical_device_finish(&device->vk);
292 goto fail_free_cache;
293 }
294 #endif
295
296 return VK_SUCCESS;
297
298 fail_free_cache:
299 disk_cache_destroy(device->disk_cache);
300 fail_free_name:
301 vk_free(&instance->vk.alloc, (void *)device->name);
302 return result;
303 }
304
305 static void
tu_physical_device_finish(struct tu_physical_device * device)306 tu_physical_device_finish(struct tu_physical_device *device)
307 {
308 #if TU_HAS_SURFACE
309 tu_wsi_finish(device);
310 #endif
311
312 disk_cache_destroy(device->disk_cache);
313 close(device->local_fd);
314 if (device->master_fd != -1)
315 close(device->master_fd);
316
317 vk_free(&device->instance->vk.alloc, (void *)device->name);
318
319 vk_physical_device_finish(&device->vk);
320 }
321
322 static const struct debug_control tu_debug_options[] = {
323 { "startup", TU_DEBUG_STARTUP },
324 { "nir", TU_DEBUG_NIR },
325 { "nobin", TU_DEBUG_NOBIN },
326 { "sysmem", TU_DEBUG_SYSMEM },
327 { "gmem", TU_DEBUG_GMEM },
328 { "forcebin", TU_DEBUG_FORCEBIN },
329 { "noubwc", TU_DEBUG_NOUBWC },
330 { "nomultipos", TU_DEBUG_NOMULTIPOS },
331 { "nolrz", TU_DEBUG_NOLRZ },
332 { "perfc", TU_DEBUG_PERFC },
333 { "flushall", TU_DEBUG_FLUSHALL },
334 { "syncdraw", TU_DEBUG_SYNCDRAW },
335 { "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD },
336 { "rast_order", TU_DEBUG_RAST_ORDER },
337 { NULL, 0 }
338 };
339
340 const char *
tu_get_debug_option_name(int id)341 tu_get_debug_option_name(int id)
342 {
343 assert(id < ARRAY_SIZE(tu_debug_options) - 1);
344 return tu_debug_options[id].string;
345 }
346
347 static const driOptionDescription tu_dri_options[] = {
348 DRI_CONF_SECTION_PERFORMANCE
349 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
350 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
351 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
352 DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
353 DRI_CONF_SECTION_END
354
355 DRI_CONF_SECTION_DEBUG
356 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
357 DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
358 DRI_CONF_SECTION_END
359 };
360
361 static void
tu_init_dri_options(struct tu_instance * instance)362 tu_init_dri_options(struct tu_instance *instance)
363 {
364 driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
365 ARRAY_SIZE(tu_dri_options));
366 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
367 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
368 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
369
370 if (driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load"))
371 instance->debug_flags |= TU_DEBUG_DONT_CARE_AS_LOAD;
372 }
373
374 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)375 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
376 const VkAllocationCallbacks *pAllocator,
377 VkInstance *pInstance)
378 {
379 struct tu_instance *instance;
380 VkResult result;
381
382 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
383
384 if (pAllocator == NULL)
385 pAllocator = vk_default_allocator();
386
387 instance = vk_zalloc(pAllocator, sizeof(*instance), 8,
388 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
389
390 if (!instance)
391 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
392
393 struct vk_instance_dispatch_table dispatch_table;
394 vk_instance_dispatch_table_from_entrypoints(
395 &dispatch_table, &tu_instance_entrypoints, true);
396 vk_instance_dispatch_table_from_entrypoints(
397 &dispatch_table, &wsi_instance_entrypoints, false);
398
399 result = vk_instance_init(&instance->vk,
400 &tu_instance_extensions_supported,
401 &dispatch_table,
402 pCreateInfo, pAllocator);
403 if (result != VK_SUCCESS) {
404 vk_free(pAllocator, instance);
405 return vk_error(NULL, result);
406 }
407
408 instance->physical_device_count = -1;
409
410 instance->debug_flags =
411 parse_debug_string(os_get_option("TU_DEBUG"), tu_debug_options);
412
413 #ifdef DEBUG
414 /* Enable startup debugging by default on debug drivers. You almost always
415 * want to see your startup failures in that case, and it's hard to set
416 * this env var on android.
417 */
418 instance->debug_flags |= TU_DEBUG_STARTUP;
419 #endif
420
421 if (instance->debug_flags & TU_DEBUG_STARTUP)
422 mesa_logi("Created an instance");
423
424 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
425
426 tu_init_dri_options(instance);
427
428 *pInstance = tu_instance_to_handle(instance);
429
430 #ifdef HAVE_PERFETTO
431 tu_perfetto_init();
432 #endif
433
434 return VK_SUCCESS;
435 }
436
437 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)438 tu_DestroyInstance(VkInstance _instance,
439 const VkAllocationCallbacks *pAllocator)
440 {
441 TU_FROM_HANDLE(tu_instance, instance, _instance);
442
443 if (!instance)
444 return;
445
446 for (int i = 0; i < instance->physical_device_count; ++i) {
447 tu_physical_device_finish(instance->physical_devices + i);
448 }
449
450 VG(VALGRIND_DESTROY_MEMPOOL(instance));
451
452 driDestroyOptionCache(&instance->dri_options);
453 driDestroyOptionInfo(&instance->available_dri_options);
454
455 vk_instance_finish(&instance->vk);
456 vk_free(&instance->vk.alloc, instance);
457 }
458
459 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)460 tu_EnumeratePhysicalDevices(VkInstance _instance,
461 uint32_t *pPhysicalDeviceCount,
462 VkPhysicalDevice *pPhysicalDevices)
463 {
464 TU_FROM_HANDLE(tu_instance, instance, _instance);
465 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
466 pPhysicalDevices, pPhysicalDeviceCount);
467
468 VkResult result;
469
470 if (instance->physical_device_count < 0) {
471 result = tu_enumerate_devices(instance);
472 if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
473 return result;
474 }
475
476 for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
477 vk_outarray_append_typed(VkPhysicalDevice, &out, p)
478 {
479 *p = tu_physical_device_to_handle(instance->physical_devices + i);
480 }
481 }
482
483 return vk_outarray_status(&out);
484 }
485
486 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)487 tu_EnumeratePhysicalDeviceGroups(
488 VkInstance _instance,
489 uint32_t *pPhysicalDeviceGroupCount,
490 VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
491 {
492 TU_FROM_HANDLE(tu_instance, instance, _instance);
493 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
494 pPhysicalDeviceGroupProperties,
495 pPhysicalDeviceGroupCount);
496 VkResult result;
497
498 if (instance->physical_device_count < 0) {
499 result = tu_enumerate_devices(instance);
500 if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
501 return result;
502 }
503
504 for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
505 vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
506 {
507 p->physicalDeviceCount = 1;
508 p->physicalDevices[0] =
509 tu_physical_device_to_handle(instance->physical_devices + i);
510 p->subsetAllocation = false;
511 }
512 }
513
514 return vk_outarray_status(&out);
515 }
516
517 static void
tu_get_physical_device_features_1_1(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * features)518 tu_get_physical_device_features_1_1(struct tu_physical_device *pdevice,
519 VkPhysicalDeviceVulkan11Features *features)
520 {
521 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit;
522 features->uniformAndStorageBuffer16BitAccess = false;
523 features->storagePushConstant16 = false;
524 features->storageInputOutput16 = false;
525 features->multiview = true;
526 features->multiviewGeometryShader = false;
527 features->multiviewTessellationShader = false;
528 features->variablePointersStorageBuffer = true;
529 features->variablePointers = true;
530 features->protectedMemory = false;
531 features->samplerYcbcrConversion = true;
532 features->shaderDrawParameters = true;
533 }
534
535 static void
tu_get_physical_device_features_1_2(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * features)536 tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
537 VkPhysicalDeviceVulkan12Features *features)
538 {
539 features->samplerMirrorClampToEdge = true;
540 features->drawIndirectCount = true;
541 features->storageBuffer8BitAccess = false;
542 features->uniformAndStorageBuffer8BitAccess = false;
543 features->storagePushConstant8 = false;
544 features->shaderBufferInt64Atomics = false;
545 features->shaderSharedInt64Atomics = false;
546 features->shaderFloat16 = true;
547 features->shaderInt8 = false;
548
549 features->descriptorIndexing = true;
550 features->shaderInputAttachmentArrayDynamicIndexing = false;
551 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
552 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
553 features->shaderUniformBufferArrayNonUniformIndexing = true;
554 features->shaderSampledImageArrayNonUniformIndexing = true;
555 features->shaderStorageBufferArrayNonUniformIndexing = true;
556 features->shaderStorageImageArrayNonUniformIndexing = true;
557 features->shaderInputAttachmentArrayNonUniformIndexing = false;
558 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
559 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
560 features->descriptorBindingUniformBufferUpdateAfterBind = true;
561 features->descriptorBindingSampledImageUpdateAfterBind = true;
562 features->descriptorBindingStorageImageUpdateAfterBind = true;
563 features->descriptorBindingStorageBufferUpdateAfterBind = true;
564 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
565 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
566 features->descriptorBindingUpdateUnusedWhilePending = true;
567 features->descriptorBindingPartiallyBound = true;
568 features->descriptorBindingVariableDescriptorCount = true;
569 features->runtimeDescriptorArray = true;
570
571 features->samplerFilterMinmax = true;
572 features->scalarBlockLayout = true;
573 features->imagelessFramebuffer = true;
574 features->uniformBufferStandardLayout = true;
575 features->shaderSubgroupExtendedTypes = true;
576 features->separateDepthStencilLayouts = true;
577 features->hostQueryReset = true;
578 features->timelineSemaphore = true;
579 features->bufferDeviceAddress = true;
580 features->bufferDeviceAddressCaptureReplay = false;
581 features->bufferDeviceAddressMultiDevice = false;
582 features->vulkanMemoryModel = true;
583 features->vulkanMemoryModelDeviceScope = true;
584 features->vulkanMemoryModelAvailabilityVisibilityChains = true;
585 features->shaderOutputViewportIndex = true;
586 features->shaderOutputLayer = true;
587 features->subgroupBroadcastDynamicId = true;
588 }
589
590 static void
tu_get_physical_device_features_1_3(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan13Features * features)591 tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice,
592 VkPhysicalDeviceVulkan13Features *features)
593 {
594 features->robustImageAccess = true;
595 features->inlineUniformBlock = false;
596 features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
597 features->pipelineCreationCacheControl = false;
598 features->privateData = true;
599 features->shaderDemoteToHelperInvocation = true;
600 features->shaderTerminateInvocation = true;
601 features->subgroupSizeControl = true;
602 features->computeFullSubgroups = true;
603 features->synchronization2 = false;
604 features->textureCompressionASTC_HDR = false;
605 features->shaderZeroInitializeWorkgroupMemory = true;
606 features->dynamicRendering = false;
607 features->shaderIntegerDotProduct = true;
608 features->maintenance4 = false;
609 }
610
611 void
tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)612 tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
613 VkPhysicalDeviceFeatures2 *pFeatures)
614 {
615 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
616
617 pFeatures->features = (VkPhysicalDeviceFeatures) {
618 .robustBufferAccess = true,
619 .fullDrawIndexUint32 = true,
620 .imageCubeArray = true,
621 .independentBlend = true,
622 .geometryShader = true,
623 .tessellationShader = true,
624 .sampleRateShading = true,
625 .dualSrcBlend = true,
626 .logicOp = true,
627 .multiDrawIndirect = true,
628 .drawIndirectFirstInstance = true,
629 .depthClamp = true,
630 .depthBiasClamp = true,
631 .fillModeNonSolid = true,
632 .depthBounds = true,
633 .wideLines = false,
634 .largePoints = true,
635 .alphaToOne = true,
636 .multiViewport = true,
637 .samplerAnisotropy = true,
638 .textureCompressionETC2 = true,
639 .textureCompressionASTC_LDR = true,
640 .textureCompressionBC = true,
641 .occlusionQueryPrecise = true,
642 .pipelineStatisticsQuery = true,
643 .vertexPipelineStoresAndAtomics = true,
644 .fragmentStoresAndAtomics = true,
645 .shaderTessellationAndGeometryPointSize = false,
646 .shaderImageGatherExtended = true,
647 .shaderStorageImageExtendedFormats = true,
648 .shaderStorageImageMultisample = false,
649 .shaderUniformBufferArrayDynamicIndexing = true,
650 .shaderSampledImageArrayDynamicIndexing = true,
651 .shaderStorageBufferArrayDynamicIndexing = true,
652 .shaderStorageImageArrayDynamicIndexing = true,
653 .shaderStorageImageReadWithoutFormat = true,
654 .shaderStorageImageWriteWithoutFormat = true,
655 .shaderClipDistance = true,
656 .shaderCullDistance = true,
657 .shaderFloat64 = false,
658 .shaderInt64 = false,
659 .shaderInt16 = true,
660 .sparseBinding = false,
661 .variableMultisampleRate = true,
662 .inheritedQueries = true,
663 };
664
665 VkPhysicalDeviceVulkan11Features core_1_1 = {
666 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
667 };
668 tu_get_physical_device_features_1_1(pdevice, &core_1_1);
669
670 VkPhysicalDeviceVulkan12Features core_1_2 = {
671 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
672 };
673 tu_get_physical_device_features_1_2(pdevice, &core_1_2);
674
675 VkPhysicalDeviceVulkan13Features core_1_3 = {
676 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
677 };
678 tu_get_physical_device_features_1_3(pdevice, &core_1_3);
679
680 vk_foreach_struct(ext, pFeatures->pNext)
681 {
682 if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
683 continue;
684 if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
685 continue;
686 if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
687 continue;
688
689 switch (ext->sType) {
690 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
691 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
692 (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext;
693 features->conditionalRendering = true;
694 features->inheritedConditionalRendering = true;
695 break;
696 }
697 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
698 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
699 (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext;
700 features->transformFeedback = true;
701 features->geometryStreams = true;
702 break;
703 }
704 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
705 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
706 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
707 features->indexTypeUint8 = true;
708 break;
709 }
710 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
711 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
712 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
713 features->vertexAttributeInstanceRateDivisor = true;
714 features->vertexAttributeInstanceRateZeroDivisor = true;
715 break;
716 }
717 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
718 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
719 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
720 features->depthClipEnable = true;
721 break;
722 }
723 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
724 VkPhysicalDevice4444FormatsFeaturesEXT *features = (void *)ext;
725 features->formatA4R4G4B4 = true;
726 features->formatA4B4G4R4 = true;
727 break;
728 }
729 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
730 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext;
731 features->customBorderColors = true;
732 features->customBorderColorWithoutFormat = true;
733 break;
734 }
735 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
736 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext;
737 features->extendedDynamicState = true;
738 break;
739 }
740 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
741 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
742 (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
743 features->extendedDynamicState2 = true;
744 features->extendedDynamicState2LogicOp = false;
745 features->extendedDynamicState2PatchControlPoints = false;
746 break;
747 }
748 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
749 VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
750 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
751 feature->performanceCounterQueryPools = true;
752 feature->performanceCounterMultipleQueryPools = false;
753 break;
754 }
755 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
756 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
757 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
758 features->pipelineExecutableInfo = true;
759 break;
760 }
761 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
762 VkPhysicalDeviceShaderFloat16Int8Features *features =
763 (VkPhysicalDeviceShaderFloat16Int8Features *) ext;
764 features->shaderFloat16 = true;
765 features->shaderInt8 = false;
766 break;
767 }
768 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
769 VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = (void *)ext;
770 features->scalarBlockLayout = true;
771 break;
772 }
773 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
774 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
775 features->robustBufferAccess2 = true;
776 features->robustImageAccess2 = true;
777 features->nullDescriptor = true;
778 break;
779 }
780 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
781 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
782 (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
783 features->timelineSemaphore = true;
784 break;
785 }
786 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
787 VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
788 (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
789 features->provokingVertexLast = true;
790 features->transformFeedbackPreservesProvokingVertex = true;
791 break;
792 }
793 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
794 VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
795 (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
796 features->mutableDescriptorType = true;
797 break;
798 }
799 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
800 VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
801 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
802 features->rectangularLines = true;
803 features->bresenhamLines = true;
804 features->smoothLines = false;
805 features->stippledRectangularLines = false;
806 features->stippledBresenhamLines = false;
807 features->stippledSmoothLines = false;
808 break;
809 }
810 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
811 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
812 (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
813 features->primitiveTopologyListRestart = true;
814 features->primitiveTopologyPatchListRestart = false;
815 break;
816 }
817 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM: {
818 VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *features =
819 (VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *)ext;
820 features->rasterizationOrderColorAttachmentAccess = true;
821 features->rasterizationOrderDepthAttachmentAccess = true;
822 features->rasterizationOrderStencilAttachmentAccess = true;
823 break;
824 }
825 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
826 VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
827 (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
828 features->depthClipControl = true;
829 break;
830 }
831 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
832 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
833 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
834 features->texelBufferAlignment = true;
835 break;
836 }
837
838 default:
839 break;
840 }
841 }
842 }
843
844
845 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)846 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
847 VkPhysicalDeviceVulkan11Properties *p)
848 {
849 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
850
851 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
852 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
853 memset(p->deviceLUID, 0, VK_LUID_SIZE);
854 p->deviceNodeMask = 0;
855 p->deviceLUIDValid = false;
856
857 p->subgroupSize = 128;
858 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
859 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
860 VK_SUBGROUP_FEATURE_VOTE_BIT |
861 VK_SUBGROUP_FEATURE_BALLOT_BIT |
862 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
863 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
864 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
865 if (pdevice->info->a6xx.has_getfiberid) {
866 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
867 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
868 }
869
870 p->subgroupQuadOperationsInAllStages = false;
871
872 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
873 p->maxMultiviewViewCount = MAX_VIEWS;
874 p->maxMultiviewInstanceIndex = INT_MAX;
875 p->protectedNoFault = false;
876 /* Make sure everything is addressable by a signed 32-bit int, and
877 * our largest descriptors are 96 bytes.
878 */
879 p->maxPerSetDescriptors = (1ull << 31) / 96;
880 /* Our buffer size fields allow only this much */
881 p->maxMemoryAllocationSize = 0xFFFFFFFFull;
882
883 }
884
885
886 /* I have no idea what the maximum size is, but the hardware supports very
887 * large numbers of descriptors (at least 2^16). This limit is based on
888 * CP_LOAD_STATE6, which has a 28-bit field for the DWORD offset, so that
889 * we don't have to think about what to do if that overflows, but really
890 * nothing is likely to get close to this.
891 */
892 static const size_t max_descriptor_set_size = (1 << 28) / A6XX_TEX_CONST_DWORDS;
893 static const VkSampleCountFlags sample_counts =
894 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
895
896 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)897 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
898 VkPhysicalDeviceVulkan12Properties *p)
899 {
900 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
901
902 p->driverID = VK_DRIVER_ID_MESA_TURNIP;
903 memset(p->driverName, 0, sizeof(p->driverName));
904 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
905 "turnip Mesa driver");
906 memset(p->driverInfo, 0, sizeof(p->driverInfo));
907 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
908 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
909 p->conformanceVersion = (VkConformanceVersionKHR) {
910 .major = 1,
911 .minor = 2,
912 .subminor = 7,
913 .patch = 1,
914 };
915
916 p->denormBehaviorIndependence =
917 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
918 p->roundingModeIndependence =
919 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
920
921 p->shaderDenormFlushToZeroFloat16 = true;
922 p->shaderDenormPreserveFloat16 = false;
923 p->shaderRoundingModeRTEFloat16 = true;
924 p->shaderRoundingModeRTZFloat16 = false;
925 p->shaderSignedZeroInfNanPreserveFloat16 = true;
926
927 p->shaderDenormFlushToZeroFloat32 = true;
928 p->shaderDenormPreserveFloat32 = false;
929 p->shaderRoundingModeRTEFloat32 = true;
930 p->shaderRoundingModeRTZFloat32 = false;
931 p->shaderSignedZeroInfNanPreserveFloat32 = true;
932
933 p->shaderDenormFlushToZeroFloat64 = false;
934 p->shaderDenormPreserveFloat64 = false;
935 p->shaderRoundingModeRTEFloat64 = false;
936 p->shaderRoundingModeRTZFloat64 = false;
937 p->shaderSignedZeroInfNanPreserveFloat64 = false;
938
939 p->shaderUniformBufferArrayNonUniformIndexingNative = true;
940 p->shaderSampledImageArrayNonUniformIndexingNative = true;
941 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
942 p->shaderStorageImageArrayNonUniformIndexingNative = true;
943 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
944 p->robustBufferAccessUpdateAfterBind = false;
945 p->quadDivergentImplicitLod = false;
946
947 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size;
948 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
949 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
950 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
951 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
952 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
953 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
954 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
955 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
956 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
957 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
958 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
959 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
960 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
961 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
962 p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
963
964 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
965 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
966 p->independentResolveNone = false;
967 p->independentResolve = false;
968
969 p->filterMinmaxSingleComponentFormats = true;
970 p->filterMinmaxImageComponentMapping = true;
971
972 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
973
974 p->framebufferIntegerColorSampleCounts = sample_counts;
975 }
976
977 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan13Properties * p)978 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
979 VkPhysicalDeviceVulkan13Properties *p)
980 {
981 /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
982 p->minSubgroupSize = 64; /* threadsize_base */
983 p->maxSubgroupSize = 128; /* threadsize_base * 2 */
984 p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
985 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
986
987 /* VK_EXT_inline_uniform_block is not implemented */
988 p->maxInlineUniformBlockSize = 0;
989 p->maxPerStageDescriptorInlineUniformBlocks = 0;
990 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 0;
991 p->maxDescriptorSetInlineUniformBlocks = 0;
992 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 0;
993 p->maxInlineUniformTotalSize = 0;
994
995 p->integerDotProduct8BitUnsignedAccelerated = false;
996 p->integerDotProduct8BitSignedAccelerated = false;
997 p->integerDotProduct8BitMixedSignednessAccelerated = false;
998 p->integerDotProduct4x8BitPackedUnsignedAccelerated =
999 pdevice->info->a6xx.has_dp2acc;
1000 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
1001 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
1002 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
1003 pdevice->info->a6xx.has_dp2acc;
1004 p->integerDotProduct16BitUnsignedAccelerated = false;
1005 p->integerDotProduct16BitSignedAccelerated = false;
1006 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1007 p->integerDotProduct32BitUnsignedAccelerated = false;
1008 p->integerDotProduct32BitSignedAccelerated = false;
1009 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1010 p->integerDotProduct64BitUnsignedAccelerated = false;
1011 p->integerDotProduct64BitSignedAccelerated = false;
1012 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1013 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1014 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1015 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1016 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
1017 pdevice->info->a6xx.has_dp2acc;
1018 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
1019 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
1020 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
1021 pdevice->info->a6xx.has_dp2acc;
1022 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1023 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1024 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1025 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1026 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1027 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1028 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1029 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1030 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1031
1032 p->storageTexelBufferOffsetAlignmentBytes = 64;
1033 p->storageTexelBufferOffsetSingleTexelAlignment = false;
1034 p->uniformTexelBufferOffsetAlignmentBytes = 64;
1035 p->uniformTexelBufferOffsetSingleTexelAlignment = false;
1036
1037 /* TODO: find out the limit */
1038 p->maxBufferSize = 0;
1039 }
1040
1041 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1042 tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1043 VkPhysicalDeviceProperties2 *pProperties)
1044 {
1045 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1046
1047 VkPhysicalDeviceLimits limits = {
1048 .maxImageDimension1D = (1 << 14),
1049 .maxImageDimension2D = (1 << 14),
1050 .maxImageDimension3D = (1 << 11),
1051 .maxImageDimensionCube = (1 << 14),
1052 .maxImageArrayLayers = (1 << 11),
1053 .maxTexelBufferElements = 128 * 1024 * 1024,
1054 .maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE,
1055 .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
1056 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1057 .maxMemoryAllocationCount = UINT32_MAX,
1058 .maxSamplerAllocationCount = 64 * 1024,
1059 .bufferImageGranularity = 64, /* A cache line */
1060 .sparseAddressSpaceSize = 0,
1061 .maxBoundDescriptorSets = MAX_SETS,
1062 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1063 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1064 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1065 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1066 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1067 .maxPerStageDescriptorInputAttachments = MAX_RTS,
1068 .maxPerStageResources = max_descriptor_set_size,
1069 .maxDescriptorSetSamplers = max_descriptor_set_size,
1070 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1071 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1072 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1073 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1074 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1075 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1076 .maxDescriptorSetInputAttachments = MAX_RTS,
1077 .maxVertexInputAttributes = 32,
1078 .maxVertexInputBindings = 32,
1079 .maxVertexInputAttributeOffset = 4095,
1080 .maxVertexInputBindingStride = 2048,
1081 .maxVertexOutputComponents = 128,
1082 .maxTessellationGenerationLevel = 64,
1083 .maxTessellationPatchSize = 32,
1084 .maxTessellationControlPerVertexInputComponents = 128,
1085 .maxTessellationControlPerVertexOutputComponents = 128,
1086 .maxTessellationControlPerPatchOutputComponents = 120,
1087 .maxTessellationControlTotalOutputComponents = 4096,
1088 .maxTessellationEvaluationInputComponents = 128,
1089 .maxTessellationEvaluationOutputComponents = 128,
1090 .maxGeometryShaderInvocations = 32,
1091 .maxGeometryInputComponents = 64,
1092 .maxGeometryOutputComponents = 128,
1093 .maxGeometryOutputVertices = 256,
1094 .maxGeometryTotalOutputComponents = 1024,
1095 .maxFragmentInputComponents = 124,
1096 .maxFragmentOutputAttachments = 8,
1097 .maxFragmentDualSrcAttachments = 1,
1098 .maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2,
1099 .maxComputeSharedMemorySize = 32768,
1100 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1101 .maxComputeWorkGroupInvocations = 2048,
1102 .maxComputeWorkGroupSize = { 1024, 1024, 1024 },
1103 .subPixelPrecisionBits = 8,
1104 .subTexelPrecisionBits = 8,
1105 .mipmapPrecisionBits = 8,
1106 .maxDrawIndexedIndexValue = UINT32_MAX,
1107 .maxDrawIndirectCount = UINT32_MAX,
1108 .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */
1109 .maxSamplerAnisotropy = 16,
1110 .maxViewports = MAX_VIEWPORTS,
1111 .maxViewportDimensions = { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
1112 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1113 .viewportSubPixelBits = 8,
1114 .minMemoryMapAlignment = 4096, /* A page */
1115 .minTexelBufferOffsetAlignment = 64,
1116 .minUniformBufferOffsetAlignment = 64,
1117 .minStorageBufferOffsetAlignment = 64,
1118 .minTexelOffset = -16,
1119 .maxTexelOffset = 15,
1120 .minTexelGatherOffset = -32,
1121 .maxTexelGatherOffset = 31,
1122 .minInterpolationOffset = -0.5,
1123 .maxInterpolationOffset = 0.4375,
1124 .subPixelInterpolationOffsetBits = 4,
1125 .maxFramebufferWidth = (1 << 14),
1126 .maxFramebufferHeight = (1 << 14),
1127 .maxFramebufferLayers = (1 << 10),
1128 .framebufferColorSampleCounts = sample_counts,
1129 .framebufferDepthSampleCounts = sample_counts,
1130 .framebufferStencilSampleCounts = sample_counts,
1131 .framebufferNoAttachmentsSampleCounts = sample_counts,
1132 .maxColorAttachments = MAX_RTS,
1133 .sampledImageColorSampleCounts = sample_counts,
1134 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1135 .sampledImageDepthSampleCounts = sample_counts,
1136 .sampledImageStencilSampleCounts = sample_counts,
1137 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1138 .maxSampleMaskWords = 1,
1139 .timestampComputeAndGraphics = true,
1140 .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
1141 .maxClipDistances = 8,
1142 .maxCullDistances = 8,
1143 .maxCombinedClipAndCullDistances = 8,
1144 .discreteQueuePriorities = 2,
1145 .pointSizeRange = { 1, 4092 },
1146 .lineWidthRange = { 1.0, 1.0 },
1147 .pointSizeGranularity = 0.0625,
1148 .lineWidthGranularity = 0.0,
1149 .strictLines = true,
1150 .standardSampleLocations = true,
1151 .optimalBufferCopyOffsetAlignment = 128,
1152 .optimalBufferCopyRowPitchAlignment = 128,
1153 .nonCoherentAtomSize = 64,
1154 };
1155
1156 pProperties->properties = (VkPhysicalDeviceProperties) {
1157 .apiVersion = TU_API_VERSION,
1158 .driverVersion = vk_get_driver_version(),
1159 .vendorID = 0x5143,
1160 .deviceID = pdevice->dev_id.chip_id,
1161 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1162 .limits = limits,
1163 .sparseProperties = { 0 },
1164 };
1165
1166 strcpy(pProperties->properties.deviceName, pdevice->name);
1167 memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1168
1169 VkPhysicalDeviceVulkan11Properties core_1_1 = {
1170 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1171 };
1172 tu_get_physical_device_properties_1_1(pdevice, &core_1_1);
1173
1174 VkPhysicalDeviceVulkan12Properties core_1_2 = {
1175 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1176 };
1177 tu_get_physical_device_properties_1_2(pdevice, &core_1_2);
1178
1179 VkPhysicalDeviceVulkan13Properties core_1_3 = {
1180 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
1181 };
1182 tu_get_physical_device_properties_1_3(pdevice, &core_1_3);
1183
1184 vk_foreach_struct(ext, pProperties->pNext)
1185 {
1186 if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1187 continue;
1188 if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1189 continue;
1190 if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
1191 continue;
1192
1193 switch (ext->sType) {
1194 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1195 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1196 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1197 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1198 break;
1199 }
1200 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1201 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1202 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1203
1204 properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1205 properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1206 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1207 properties->maxTransformFeedbackStreamDataSize = 512;
1208 properties->maxTransformFeedbackBufferDataSize = 512;
1209 properties->maxTransformFeedbackBufferDataStride = 512;
1210 properties->transformFeedbackQueries = true;
1211 properties->transformFeedbackStreamsLinesTriangles = true;
1212 properties->transformFeedbackRasterizationStreamSelect = true;
1213 properties->transformFeedbackDraw = true;
1214 break;
1215 }
1216 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1217 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1218 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1219 properties->sampleLocationSampleCounts = 0;
1220 if (pdevice->vk.supported_extensions.EXT_sample_locations) {
1221 properties->sampleLocationSampleCounts =
1222 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
1223 }
1224 properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1225 properties->sampleLocationCoordinateRange[0] = 0.0f;
1226 properties->sampleLocationCoordinateRange[1] = 0.9375f;
1227 properties->sampleLocationSubPixelBits = 4;
1228 properties->variableSampleLocations = true;
1229 break;
1230 }
1231 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1232 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
1233 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1234 props->maxVertexAttribDivisor = UINT32_MAX;
1235 break;
1236 }
1237 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1238 VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (void *)ext;
1239 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1240 break;
1241 }
1242 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
1243 VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
1244 (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
1245 properties->allowCommandBufferQueryCopies = false;
1246 break;
1247 }
1248 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1249 VkPhysicalDeviceRobustness2PropertiesEXT *props = (void *)ext;
1250 /* see write_buffer_descriptor() */
1251 props->robustStorageBufferAccessSizeAlignment = 4;
1252 /* see write_ubo_descriptor() */
1253 props->robustUniformBufferAccessSizeAlignment = 16;
1254 break;
1255 }
1256
1257 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
1258 VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
1259 (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
1260 properties->provokingVertexModePerPipeline = true;
1261 properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1262 break;
1263 }
1264 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1265 VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1266 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1267 props->lineSubPixelPrecisionBits = 8;
1268 break;
1269 }
1270 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
1271 VkPhysicalDeviceDrmPropertiesEXT *props =
1272 (VkPhysicalDeviceDrmPropertiesEXT *)ext;
1273 props->hasPrimary = pdevice->has_master;
1274 props->primaryMajor = pdevice->master_major;
1275 props->primaryMinor = pdevice->master_minor;
1276
1277 props->hasRender = pdevice->has_local;
1278 props->renderMajor = pdevice->local_major;
1279 props->renderMinor = pdevice->local_minor;
1280 break;
1281 }
1282
1283 default:
1284 break;
1285 }
1286 }
1287 }
1288
1289 static const VkQueueFamilyProperties tu_queue_family_properties = {
1290 .queueFlags =
1291 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1292 .queueCount = 1,
1293 .timestampValidBits = 48,
1294 .minImageTransferGranularity = { 1, 1, 1 },
1295 };
1296
1297 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1298 tu_GetPhysicalDeviceQueueFamilyProperties2(
1299 VkPhysicalDevice physicalDevice,
1300 uint32_t *pQueueFamilyPropertyCount,
1301 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1302 {
1303 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1304 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1305
1306 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1307 {
1308 p->queueFamilyProperties = tu_queue_family_properties;
1309 }
1310 }
1311
1312 uint64_t
tu_get_system_heap_size()1313 tu_get_system_heap_size()
1314 {
1315 struct sysinfo info;
1316 sysinfo(&info);
1317
1318 uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;
1319
1320 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1321 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1322 */
1323 uint64_t available_ram;
1324 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1325 available_ram = total_ram / 2;
1326 else
1327 available_ram = total_ram * 3 / 4;
1328
1329 return available_ram;
1330 }
1331
1332 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1333 tu_get_budget_memory(struct tu_physical_device *physical_device)
1334 {
1335 uint64_t heap_size = physical_device->heap.size;
1336 uint64_t heap_used = physical_device->heap.used;
1337 uint64_t sys_available;
1338 ASSERTED bool has_available_memory =
1339 os_get_available_system_memory(&sys_available);
1340 assert(has_available_memory);
1341
1342 /*
1343 * Let's not incite the app to starve the system: report at most 90% of
1344 * available system memory.
1345 */
1346 uint64_t heap_available = sys_available * 9 / 10;
1347 return MIN2(heap_size, heap_used + heap_available);
1348 }
1349
1350 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1351 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1352 VkPhysicalDeviceMemoryProperties2 *props2)
1353 {
1354 TU_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1355
1356 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1357 props->memoryHeapCount = 1;
1358 props->memoryHeaps[0].size = physical_device->heap.size;
1359 props->memoryHeaps[0].flags = physical_device->heap.flags;
1360
1361 props->memoryTypeCount = 1;
1362 props->memoryTypes[0].propertyFlags =
1363 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1364 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1365 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1366 props->memoryTypes[0].heapIndex = 0;
1367
1368 vk_foreach_struct(ext, props2->pNext)
1369 {
1370 switch (ext->sType) {
1371 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1372 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1373 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1374 memory_budget_props->heapUsage[0] = physical_device->heap.used;
1375 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1376
1377 /* The heapBudget and heapUsage values must be zero for array elements
1378 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1379 */
1380 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1381 memory_budget_props->heapBudget[i] = 0u;
1382 memory_budget_props->heapUsage[i] = 0u;
1383 }
1384 break;
1385 }
1386 default:
1387 break;
1388 }
1389 }
1390 }
1391
1392 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info)1393 tu_queue_init(struct tu_device *device,
1394 struct tu_queue *queue,
1395 int idx,
1396 const VkDeviceQueueCreateInfo *create_info)
1397 {
1398 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1399 if (result != VK_SUCCESS)
1400 return result;
1401
1402 queue->device = device;
1403 #ifndef TU_USE_KGSL
1404 queue->vk.driver_submit = tu_queue_submit;
1405 #endif
1406
1407 int ret = tu_drm_submitqueue_new(device, 0, &queue->msm_queue_id);
1408 if (ret)
1409 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1410 "submitqueue create failed");
1411
1412 queue->fence = -1;
1413
1414 return VK_SUCCESS;
1415 }
1416
1417 static void
tu_queue_finish(struct tu_queue * queue)1418 tu_queue_finish(struct tu_queue *queue)
1419 {
1420 vk_queue_finish(&queue->vk);
1421 if (queue->fence >= 0)
1422 close(queue->fence);
1423 tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1424 }
1425
1426 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1427 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1428 {
1429 /* This is based on the 19.2MHz always-on rbbm timer.
1430 *
1431 * TODO we should probably query this value from kernel..
1432 */
1433 return ts * (1000000000 / 19200000);
1434 }
1435
1436 static void*
tu_trace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size)1437 tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
1438 {
1439 struct tu_device *device =
1440 container_of(utctx, struct tu_device, trace_context);
1441
1442 struct tu_bo *bo;
1443 tu_bo_init_new(device, &bo, size, false);
1444
1445 return bo;
1446 }
1447
1448 static void
tu_trace_destroy_ts_buffer(struct u_trace_context * utctx,void * timestamps)1449 tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
1450 {
1451 struct tu_device *device =
1452 container_of(utctx, struct tu_device, trace_context);
1453 struct tu_bo *bo = timestamps;
1454
1455 tu_bo_finish(device, bo);
1456 }
1457
1458 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)1459 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1460 unsigned idx, bool end_of_pipe)
1461 {
1462 struct tu_bo *bo = timestamps;
1463 struct tu_cs *ts_cs = cs;
1464
1465 unsigned ts_offset = idx * sizeof(uint64_t);
1466 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1467 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
1468 tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1469 tu_cs_emit(ts_cs, 0x00000000);
1470 }
1471
1472 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)1473 tu_trace_read_ts(struct u_trace_context *utctx,
1474 void *timestamps, unsigned idx, void *flush_data)
1475 {
1476 struct tu_device *device =
1477 container_of(utctx, struct tu_device, trace_context);
1478 struct tu_bo *bo = timestamps;
1479 struct tu_u_trace_submission_data *submission_data = flush_data;
1480
1481 /* Only need to stall on results for the first entry: */
1482 if (idx == 0) {
1483 tu_device_wait_u_trace(device, submission_data->syncobj);
1484 }
1485
1486 if (tu_bo_map(device, bo) != VK_SUCCESS) {
1487 return U_TRACE_NO_TIMESTAMP;
1488 }
1489
1490 uint64_t *ts = bo->map;
1491
1492 /* Don't translate the no-timestamp marker: */
1493 if (ts[idx] == U_TRACE_NO_TIMESTAMP)
1494 return U_TRACE_NO_TIMESTAMP;
1495
1496 return tu_device_ticks_to_ns(device, ts[idx]);
1497 }
1498
1499 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1500 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1501 {
1502 struct tu_device *device =
1503 container_of(utctx, struct tu_device, trace_context);
1504 struct tu_u_trace_submission_data *submission_data = flush_data;
1505
1506 tu_u_trace_submission_data_finish(device, submission_data);
1507 }
1508
1509 void
tu_copy_timestamp_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint32_t from_offset,void * ts_to,uint32_t to_offset,uint32_t count)1510 tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
1511 void *ts_from, uint32_t from_offset,
1512 void *ts_to, uint32_t to_offset,
1513 uint32_t count)
1514 {
1515 struct tu_cs *cs = cmdstream;
1516 struct tu_bo *bo_from = ts_from;
1517 struct tu_bo *bo_to = ts_to;
1518
1519 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1520 tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
1521 tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
1522 tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
1523 }
1524
1525 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1526 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1527 struct u_trace **trace_copy)
1528 {
1529 *cs = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct tu_cs), 8,
1530 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1531
1532 if (*cs == NULL) {
1533 return VK_ERROR_OUT_OF_HOST_MEMORY;
1534 }
1535
1536 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1537 list_length(&cmdbuf->trace.trace_chunks) * 6 + 3);
1538
1539 tu_cs_begin(*cs);
1540
1541 tu_cs_emit_wfi(*cs);
1542 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1543
1544 *trace_copy = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1545 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1546
1547 if (*trace_copy == NULL) {
1548 return VK_ERROR_OUT_OF_HOST_MEMORY;
1549 }
1550
1551 u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1552 u_trace_clone_append(u_trace_begin_iterator(&cmdbuf->trace),
1553 u_trace_end_iterator(&cmdbuf->trace),
1554 *trace_copy, *cs,
1555 tu_copy_timestamp_buffer);
1556
1557 tu_cs_emit_wfi(*cs);
1558
1559 tu_cs_end(*cs);
1560
1561 return VK_SUCCESS;
1562 }
1563
1564 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1565 tu_u_trace_submission_data_create(
1566 struct tu_device *device,
1567 struct tu_cmd_buffer **cmd_buffers,
1568 uint32_t cmd_buffer_count,
1569 struct tu_u_trace_submission_data **submission_data)
1570 {
1571 *submission_data =
1572 vk_zalloc(&device->vk.alloc,
1573 sizeof(struct tu_u_trace_submission_data), 8,
1574 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1575
1576 if (!(*submission_data)) {
1577 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1578 }
1579
1580 struct tu_u_trace_submission_data *data = *submission_data;
1581
1582 data->cmd_trace_data =
1583 vk_zalloc(&device->vk.alloc,
1584 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1585 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1586
1587 if (!data->cmd_trace_data) {
1588 goto fail;
1589 }
1590
1591 data->cmd_buffer_count = cmd_buffer_count;
1592 data->last_buffer_with_tracepoints = -1;
1593
1594 for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1595 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1596
1597 if (!u_trace_has_points(&cmdbuf->trace))
1598 continue;
1599
1600 data->last_buffer_with_tracepoints = i;
1601
1602 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1603 /* A single command buffer could be submitted several times, but we
1604 * already baked timestamp iova addresses and trace points are
1605 * single-use. Therefor we have to copy trace points and create
1606 * a new timestamp buffer on every submit of reusable command buffer.
1607 */
1608 if (tu_create_copy_timestamp_cs(cmdbuf,
1609 &data->cmd_trace_data[i].timestamp_copy_cs,
1610 &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1611 goto fail;
1612 }
1613
1614 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1615 } else {
1616 data->cmd_trace_data[i].trace = &cmdbuf->trace;
1617 }
1618 }
1619
1620 assert(data->last_buffer_with_tracepoints != -1);
1621
1622 return VK_SUCCESS;
1623
1624 fail:
1625 tu_u_trace_submission_data_finish(device, data);
1626 *submission_data = NULL;
1627
1628 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1629 }
1630
1631 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1632 tu_u_trace_submission_data_finish(
1633 struct tu_device *device,
1634 struct tu_u_trace_submission_data *submission_data)
1635 {
1636 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1637 /* Only if we had to create a copy of trace we should free it */
1638 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
1639 if (cmd_data->timestamp_copy_cs) {
1640 tu_cs_finish(cmd_data->timestamp_copy_cs);
1641 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
1642
1643 u_trace_fini(cmd_data->trace);
1644 vk_free(&device->vk.alloc, cmd_data->trace);
1645 }
1646 }
1647
1648 vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
1649 vk_free(&device->vk.alloc, submission_data->syncobj);
1650 vk_free(&device->vk.alloc, submission_data);
1651 }
1652
1653 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1654 tu_CreateDevice(VkPhysicalDevice physicalDevice,
1655 const VkDeviceCreateInfo *pCreateInfo,
1656 const VkAllocationCallbacks *pAllocator,
1657 VkDevice *pDevice)
1658 {
1659 TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
1660 VkResult result;
1661 struct tu_device *device;
1662 bool custom_border_colors = false;
1663 bool perf_query_pools = false;
1664 bool robust_buffer_access2 = false;
1665
1666 vk_foreach_struct_const(ext, pCreateInfo->pNext) {
1667 switch (ext->sType) {
1668 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1669 const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
1670 custom_border_colors = border_color_features->customBorderColors;
1671 break;
1672 }
1673 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1674 const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1675 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1676 perf_query_pools = feature->performanceCounterQueryPools;
1677 break;
1678 }
1679 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1680 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1681 robust_buffer_access2 = features->robustBufferAccess2;
1682 break;
1683 }
1684 default:
1685 break;
1686 }
1687 }
1688
1689 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
1690 sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1691 if (!device)
1692 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1693
1694 struct vk_device_dispatch_table dispatch_table;
1695 vk_device_dispatch_table_from_entrypoints(
1696 &dispatch_table, &tu_device_entrypoints, true);
1697 vk_device_dispatch_table_from_entrypoints(
1698 &dispatch_table, &wsi_device_entrypoints, false);
1699
1700 result = vk_device_init(&device->vk, &physical_device->vk,
1701 &dispatch_table, pCreateInfo, pAllocator);
1702 if (result != VK_SUCCESS) {
1703 vk_free(&device->vk.alloc, device);
1704 return vk_startup_errorf(physical_device->instance, result,
1705 "vk_device_init failed");
1706 }
1707
1708 device->instance = physical_device->instance;
1709 device->physical_device = physical_device;
1710 device->fd = physical_device->local_fd;
1711 device->vk.check_status = tu_device_check_status;
1712
1713 mtx_init(&device->bo_mutex, mtx_plain);
1714 u_rwlock_init(&device->dma_bo_lock);
1715 pthread_mutex_init(&device->submit_mutex, NULL);
1716
1717 #ifndef TU_USE_KGSL
1718 vk_device_set_drm_fd(&device->vk, device->fd);
1719 #endif
1720
1721 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1722 const VkDeviceQueueCreateInfo *queue_create =
1723 &pCreateInfo->pQueueCreateInfos[i];
1724 uint32_t qfi = queue_create->queueFamilyIndex;
1725 device->queues[qfi] = vk_alloc(
1726 &device->vk.alloc, queue_create->queueCount * sizeof(struct tu_queue),
1727 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1728 if (!device->queues[qfi]) {
1729 result = vk_startup_errorf(physical_device->instance,
1730 VK_ERROR_OUT_OF_HOST_MEMORY,
1731 "OOM");
1732 goto fail_queues;
1733 }
1734
1735 memset(device->queues[qfi], 0,
1736 queue_create->queueCount * sizeof(struct tu_queue));
1737
1738 device->queue_count[qfi] = queue_create->queueCount;
1739
1740 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1741 result = tu_queue_init(device, &device->queues[qfi][q], q,
1742 queue_create);
1743 if (result != VK_SUCCESS)
1744 goto fail_queues;
1745 }
1746 }
1747
1748 device->compiler =
1749 ir3_compiler_create(NULL, &physical_device->dev_id,
1750 &(struct ir3_compiler_options) {
1751 .robust_ubo_access = robust_buffer_access2,
1752 .push_ubo_with_preamble = true,
1753 });
1754 if (!device->compiler) {
1755 result = vk_startup_errorf(physical_device->instance,
1756 VK_ERROR_INITIALIZATION_FAILED,
1757 "failed to initialize ir3 compiler");
1758 goto fail_queues;
1759 }
1760
1761 /* Initialize sparse array for refcounting imported BOs */
1762 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
1763
1764 /* initial sizes, these will increase if there is overflow */
1765 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
1766 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
1767
1768 uint32_t global_size = sizeof(struct tu6_global);
1769 if (custom_border_colors)
1770 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
1771
1772 result = tu_bo_init_new(device, &device->global_bo, global_size,
1773 TU_BO_ALLOC_ALLOW_DUMP);
1774 if (result != VK_SUCCESS) {
1775 vk_startup_errorf(device->instance, result, "BO init");
1776 goto fail_global_bo;
1777 }
1778
1779 result = tu_bo_map(device, device->global_bo);
1780 if (result != VK_SUCCESS) {
1781 vk_startup_errorf(device->instance, result, "BO map");
1782 goto fail_global_bo_map;
1783 }
1784
1785 struct tu6_global *global = device->global_bo->map;
1786 tu_init_clear_blit_shaders(device);
1787 global->predicate = 0;
1788 tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
1789 &(VkClearColorValue) {}, false);
1790 tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK],
1791 &(VkClearColorValue) {}, true);
1792 tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK],
1793 &(VkClearColorValue) { .float32[3] = 1.0f }, false);
1794 tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_OPAQUE_BLACK],
1795 &(VkClearColorValue) { .int32[3] = 1 }, true);
1796 tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE],
1797 &(VkClearColorValue) { .float32[0 ... 3] = 1.0f }, false);
1798 tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_OPAQUE_WHITE],
1799 &(VkClearColorValue) { .int32[0 ... 3] = 1 }, true);
1800
1801 /* initialize to ones so ffs can be used to find unused slots */
1802 BITSET_ONES(device->custom_border_color);
1803
1804 VkPipelineCacheCreateInfo ci;
1805 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1806 ci.pNext = NULL;
1807 ci.flags = 0;
1808 ci.pInitialData = NULL;
1809 ci.initialDataSize = 0;
1810 VkPipelineCache pc;
1811 result =
1812 tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
1813 if (result != VK_SUCCESS) {
1814 vk_startup_errorf(device->instance, result, "create pipeline cache failed");
1815 goto fail_pipeline_cache;
1816 }
1817
1818 if (perf_query_pools) {
1819 /* Prepare command streams setting pass index to the PERF_CNTRS_REG
1820 * from 0 to 31. One of these will be picked up at cmd submit time
1821 * when the perf query is executed.
1822 */
1823 struct tu_cs *cs;
1824
1825 if (!(device->perfcntrs_pass_cs = calloc(1, sizeof(struct tu_cs)))) {
1826 result = vk_startup_errorf(device->instance,
1827 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1828 goto fail_perfcntrs_pass_alloc;
1829 }
1830
1831 device->perfcntrs_pass_cs_entries = calloc(32, sizeof(struct tu_cs_entry));
1832 if (!device->perfcntrs_pass_cs_entries) {
1833 result = vk_startup_errorf(device->instance,
1834 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1835 goto fail_perfcntrs_pass_entries_alloc;
1836 }
1837
1838 cs = device->perfcntrs_pass_cs;
1839 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96);
1840
1841 for (unsigned i = 0; i < 32; i++) {
1842 struct tu_cs sub_cs;
1843
1844 result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
1845 if (result != VK_SUCCESS) {
1846 vk_startup_errorf(device->instance, result,
1847 "failed to allocate commands streams");
1848 goto fail_prepare_perfcntrs_pass_cs;
1849 }
1850
1851 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
1852 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
1853
1854 device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
1855 }
1856 }
1857
1858 /* Initialize a condition variable for timeline semaphore */
1859 pthread_condattr_t condattr;
1860 if (pthread_condattr_init(&condattr) != 0) {
1861 result = vk_startup_errorf(physical_device->instance,
1862 VK_ERROR_INITIALIZATION_FAILED,
1863 "pthread condattr init");
1864 goto fail_timeline_cond;
1865 }
1866 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
1867 pthread_condattr_destroy(&condattr);
1868 result = vk_startup_errorf(physical_device->instance,
1869 VK_ERROR_INITIALIZATION_FAILED,
1870 "pthread condattr clock setup");
1871 goto fail_timeline_cond;
1872 }
1873 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
1874 pthread_condattr_destroy(&condattr);
1875 result = vk_startup_errorf(physical_device->instance,
1876 VK_ERROR_INITIALIZATION_FAILED,
1877 "pthread cond init");
1878 goto fail_timeline_cond;
1879 }
1880 pthread_condattr_destroy(&condattr);
1881
1882 device->mem_cache = tu_pipeline_cache_from_handle(pc);
1883
1884 result = tu_autotune_init(&device->autotune, device);
1885 if (result != VK_SUCCESS) {
1886 goto fail_timeline_cond;
1887 }
1888
1889 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
1890 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
1891
1892 mtx_init(&device->mutex, mtx_plain);
1893
1894 device->submit_count = 0;
1895 u_trace_context_init(&device->trace_context, device,
1896 tu_trace_create_ts_buffer,
1897 tu_trace_destroy_ts_buffer,
1898 tu_trace_record_ts,
1899 tu_trace_read_ts,
1900 tu_trace_delete_flush_data);
1901
1902 *pDevice = tu_device_to_handle(device);
1903 return VK_SUCCESS;
1904
1905 fail_timeline_cond:
1906 fail_prepare_perfcntrs_pass_cs:
1907 free(device->perfcntrs_pass_cs_entries);
1908 tu_cs_finish(device->perfcntrs_pass_cs);
1909 fail_perfcntrs_pass_entries_alloc:
1910 free(device->perfcntrs_pass_cs);
1911 fail_perfcntrs_pass_alloc:
1912 tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
1913 fail_pipeline_cache:
1914 tu_destroy_clear_blit_shaders(device);
1915 fail_global_bo_map:
1916 tu_bo_finish(device, device->global_bo);
1917 vk_free(&device->vk.alloc, device->bo_list);
1918 fail_global_bo:
1919 ir3_compiler_destroy(device->compiler);
1920 util_sparse_array_finish(&device->bo_map);
1921
1922 fail_queues:
1923 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1924 for (unsigned q = 0; q < device->queue_count[i]; q++)
1925 tu_queue_finish(&device->queues[i][q]);
1926 if (device->queue_count[i])
1927 vk_free(&device->vk.alloc, device->queues[i]);
1928 }
1929
1930 u_rwlock_destroy(&device->dma_bo_lock);
1931 vk_device_finish(&device->vk);
1932 vk_free(&device->vk.alloc, device);
1933 return result;
1934 }
1935
1936 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1937 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
1938 {
1939 TU_FROM_HANDLE(tu_device, device, _device);
1940
1941 if (!device)
1942 return;
1943
1944 u_trace_context_fini(&device->trace_context);
1945
1946 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1947 for (unsigned q = 0; q < device->queue_count[i]; q++)
1948 tu_queue_finish(&device->queues[i][q]);
1949 if (device->queue_count[i])
1950 vk_free(&device->vk.alloc, device->queues[i]);
1951 }
1952
1953 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
1954 if (device->scratch_bos[i].initialized)
1955 tu_bo_finish(device, device->scratch_bos[i].bo);
1956 }
1957
1958 tu_destroy_clear_blit_shaders(device);
1959
1960 ir3_compiler_destroy(device->compiler);
1961
1962 VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache);
1963 tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
1964
1965 if (device->perfcntrs_pass_cs) {
1966 free(device->perfcntrs_pass_cs_entries);
1967 tu_cs_finish(device->perfcntrs_pass_cs);
1968 free(device->perfcntrs_pass_cs);
1969 }
1970
1971 tu_autotune_fini(&device->autotune, device);
1972
1973 util_sparse_array_finish(&device->bo_map);
1974 u_rwlock_destroy(&device->dma_bo_lock);
1975
1976 pthread_cond_destroy(&device->timeline_cond);
1977 vk_free(&device->vk.alloc, device->bo_list);
1978 vk_device_finish(&device->vk);
1979 vk_free(&device->vk.alloc, device);
1980 }
1981
1982 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)1983 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
1984 {
1985 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
1986 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
1987 assert(index < ARRAY_SIZE(dev->scratch_bos));
1988
1989 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
1990 if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
1991 /* Fast path: just return the already-allocated BO. */
1992 *bo = dev->scratch_bos[i].bo;
1993 return VK_SUCCESS;
1994 }
1995 }
1996
1997 /* Slow path: actually allocate the BO. We take a lock because the process
1998 * of allocating it is slow, and we don't want to block the CPU while it
1999 * finishes.
2000 */
2001 mtx_lock(&dev->scratch_bos[index].construct_mtx);
2002
2003 /* Another thread may have allocated it already while we were waiting on
2004 * the lock. We need to check this in order to avoid double-allocating.
2005 */
2006 if (dev->scratch_bos[index].initialized) {
2007 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2008 *bo = dev->scratch_bos[index].bo;
2009 return VK_SUCCESS;
2010 }
2011
2012 unsigned bo_size = 1ull << size_log2;
2013 VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size,
2014 TU_BO_ALLOC_NO_FLAGS);
2015 if (result != VK_SUCCESS) {
2016 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2017 return result;
2018 }
2019
2020 p_atomic_set(&dev->scratch_bos[index].initialized, true);
2021
2022 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2023
2024 *bo = dev->scratch_bos[index].bo;
2025 return VK_SUCCESS;
2026 }
2027
2028 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2029 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2030 VkLayerProperties *pProperties)
2031 {
2032 *pPropertyCount = 0;
2033 return VK_SUCCESS;
2034 }
2035
2036 /* Only used for kgsl since drm started using common implementation */
2037 #ifdef TU_USE_KGSL
2038 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueWaitIdle(VkQueue _queue)2039 tu_QueueWaitIdle(VkQueue _queue)
2040 {
2041 TU_FROM_HANDLE(tu_queue, queue, _queue);
2042
2043 if (vk_device_is_lost(&queue->device->vk))
2044 return VK_ERROR_DEVICE_LOST;
2045
2046 if (queue->fence < 0)
2047 return VK_SUCCESS;
2048
2049 struct pollfd fds = { .fd = queue->fence, .events = POLLIN };
2050 int ret;
2051 do {
2052 ret = poll(&fds, 1, -1);
2053 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2054
2055 /* TODO: otherwise set device lost ? */
2056 assert(ret == 1 && !(fds.revents & (POLLERR | POLLNVAL)));
2057
2058 close(queue->fence);
2059 queue->fence = -1;
2060 return VK_SUCCESS;
2061 }
2062 #endif
2063
2064 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2065 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2066 uint32_t *pPropertyCount,
2067 VkExtensionProperties *pProperties)
2068 {
2069 if (pLayerName)
2070 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2071
2072 return vk_enumerate_instance_extension_properties(
2073 &tu_instance_extensions_supported, pPropertyCount, pProperties);
2074 }
2075
2076 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2077 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2078 {
2079 TU_FROM_HANDLE(tu_instance, instance, _instance);
2080 return vk_instance_get_proc_addr(&instance->vk,
2081 &tu_instance_entrypoints,
2082 pName);
2083 }
2084
2085 /* The loader wants us to expose a second GetInstanceProcAddr function
2086 * to work around certain LD_PRELOAD issues seen in apps.
2087 */
2088 PUBLIC
2089 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2090 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName);
2091
2092 PUBLIC
2093 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2094 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2095 {
2096 return tu_GetInstanceProcAddr(instance, pName);
2097 }
2098
2099 /* With version 4+ of the loader interface the ICD should expose
2100 * vk_icdGetPhysicalDeviceProcAddr()
2101 */
2102 PUBLIC
2103 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2104 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
2105 const char* pName);
2106
2107 PFN_vkVoidFunction
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)2108 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
2109 const char* pName)
2110 {
2111 TU_FROM_HANDLE(tu_instance, instance, _instance);
2112
2113 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2114 }
2115
2116 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2117 tu_AllocateMemory(VkDevice _device,
2118 const VkMemoryAllocateInfo *pAllocateInfo,
2119 const VkAllocationCallbacks *pAllocator,
2120 VkDeviceMemory *pMem)
2121 {
2122 TU_FROM_HANDLE(tu_device, device, _device);
2123 struct tu_device_memory *mem;
2124 VkResult result;
2125
2126 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2127
2128 if (pAllocateInfo->allocationSize == 0) {
2129 /* Apparently, this is allowed */
2130 *pMem = VK_NULL_HANDLE;
2131 return VK_SUCCESS;
2132 }
2133
2134 struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2135 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2136 if (mem_heap_used > mem_heap->size)
2137 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2138
2139 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2140 VK_OBJECT_TYPE_DEVICE_MEMORY);
2141 if (mem == NULL)
2142 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2143
2144 const VkImportMemoryFdInfoKHR *fd_info =
2145 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2146 if (fd_info && !fd_info->handleType)
2147 fd_info = NULL;
2148
2149 if (fd_info) {
2150 assert(fd_info->handleType ==
2151 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2152 fd_info->handleType ==
2153 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2154
2155 /*
2156 * TODO Importing the same fd twice gives us the same handle without
2157 * reference counting. We need to maintain a per-instance handle-to-bo
2158 * table and add reference count to tu_bo.
2159 */
2160 result = tu_bo_init_dmabuf(device, &mem->bo,
2161 pAllocateInfo->allocationSize, fd_info->fd);
2162 if (result == VK_SUCCESS) {
2163 /* take ownership and close the fd */
2164 close(fd_info->fd);
2165 }
2166 } else {
2167 result =
2168 tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize,
2169 TU_BO_ALLOC_NO_FLAGS);
2170 }
2171
2172
2173 if (result == VK_SUCCESS) {
2174 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2175 if (mem_heap_used > mem_heap->size) {
2176 p_atomic_add(&mem_heap->used, -mem->bo->size);
2177 tu_bo_finish(device, mem->bo);
2178 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2179 "Out of heap memory");
2180 }
2181 }
2182
2183 if (result != VK_SUCCESS) {
2184 vk_object_free(&device->vk, pAllocator, mem);
2185 return result;
2186 }
2187
2188 /* Track in the device whether our BO list contains any implicit-sync BOs, so
2189 * we can suppress implicit sync on non-WSI usage.
2190 */
2191 const struct wsi_memory_allocate_info *wsi_info =
2192 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2193 if (wsi_info && wsi_info->implicit_sync) {
2194 mtx_lock(&device->bo_mutex);
2195 if (!mem->bo->implicit_sync) {
2196 mem->bo->implicit_sync = true;
2197 device->implicit_sync_bo_count++;
2198 }
2199 mtx_unlock(&device->bo_mutex);
2200 }
2201
2202 *pMem = tu_device_memory_to_handle(mem);
2203
2204 return VK_SUCCESS;
2205 }
2206
2207 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2208 tu_FreeMemory(VkDevice _device,
2209 VkDeviceMemory _mem,
2210 const VkAllocationCallbacks *pAllocator)
2211 {
2212 TU_FROM_HANDLE(tu_device, device, _device);
2213 TU_FROM_HANDLE(tu_device_memory, mem, _mem);
2214
2215 if (mem == NULL)
2216 return;
2217
2218 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2219 tu_bo_finish(device, mem->bo);
2220 vk_object_free(&device->vk, pAllocator, mem);
2221 }
2222
2223 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2224 tu_MapMemory(VkDevice _device,
2225 VkDeviceMemory _memory,
2226 VkDeviceSize offset,
2227 VkDeviceSize size,
2228 VkMemoryMapFlags flags,
2229 void **ppData)
2230 {
2231 TU_FROM_HANDLE(tu_device, device, _device);
2232 TU_FROM_HANDLE(tu_device_memory, mem, _memory);
2233 VkResult result;
2234
2235 if (mem == NULL) {
2236 *ppData = NULL;
2237 return VK_SUCCESS;
2238 }
2239
2240 if (!mem->bo->map) {
2241 result = tu_bo_map(device, mem->bo);
2242 if (result != VK_SUCCESS)
2243 return result;
2244 }
2245
2246 *ppData = mem->bo->map + offset;
2247 return VK_SUCCESS;
2248 }
2249
2250 VKAPI_ATTR void VKAPI_CALL
tu_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2251 tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2252 {
2253 /* TODO: unmap here instead of waiting for FreeMemory */
2254 }
2255
2256 VKAPI_ATTR VkResult VKAPI_CALL
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2257 tu_FlushMappedMemoryRanges(VkDevice _device,
2258 uint32_t memoryRangeCount,
2259 const VkMappedMemoryRange *pMemoryRanges)
2260 {
2261 return VK_SUCCESS;
2262 }
2263
2264 VKAPI_ATTR VkResult VKAPI_CALL
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2265 tu_InvalidateMappedMemoryRanges(VkDevice _device,
2266 uint32_t memoryRangeCount,
2267 const VkMappedMemoryRange *pMemoryRanges)
2268 {
2269 return VK_SUCCESS;
2270 }
2271
2272 VKAPI_ATTR void VKAPI_CALL
tu_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2273 tu_GetBufferMemoryRequirements2(
2274 VkDevice device,
2275 const VkBufferMemoryRequirementsInfo2 *pInfo,
2276 VkMemoryRequirements2 *pMemoryRequirements)
2277 {
2278 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2279
2280 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2281 .memoryTypeBits = 1,
2282 .alignment = 64,
2283 .size = MAX2(align64(buffer->size, 64), buffer->size),
2284 };
2285
2286 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2287 switch (ext->sType) {
2288 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2289 VkMemoryDedicatedRequirements *req =
2290 (VkMemoryDedicatedRequirements *) ext;
2291 req->requiresDedicatedAllocation = false;
2292 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2293 break;
2294 }
2295 default:
2296 break;
2297 }
2298 }
2299 }
2300
2301 VKAPI_ATTR void VKAPI_CALL
tu_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2302 tu_GetImageMemoryRequirements2(VkDevice device,
2303 const VkImageMemoryRequirementsInfo2 *pInfo,
2304 VkMemoryRequirements2 *pMemoryRequirements)
2305 {
2306 TU_FROM_HANDLE(tu_image, image, pInfo->image);
2307
2308 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2309 .memoryTypeBits = 1,
2310 .alignment = image->layout[0].base_align,
2311 .size = image->total_size
2312 };
2313
2314 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2315 switch (ext->sType) {
2316 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2317 VkMemoryDedicatedRequirements *req =
2318 (VkMemoryDedicatedRequirements *) ext;
2319 req->requiresDedicatedAllocation = image->shareable;
2320 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2321 break;
2322 }
2323 default:
2324 break;
2325 }
2326 }
2327 }
2328
2329 VKAPI_ATTR void VKAPI_CALL
tu_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2330 tu_GetImageSparseMemoryRequirements2(
2331 VkDevice device,
2332 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2333 uint32_t *pSparseMemoryRequirementCount,
2334 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2335 {
2336 tu_stub();
2337 }
2338
2339 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2340 tu_GetDeviceMemoryCommitment(VkDevice device,
2341 VkDeviceMemory memory,
2342 VkDeviceSize *pCommittedMemoryInBytes)
2343 {
2344 *pCommittedMemoryInBytes = 0;
2345 }
2346
2347 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2348 tu_BindBufferMemory2(VkDevice device,
2349 uint32_t bindInfoCount,
2350 const VkBindBufferMemoryInfo *pBindInfos)
2351 {
2352 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2353 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2354 TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
2355
2356 if (mem) {
2357 buffer->bo = mem->bo;
2358 buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2359 } else {
2360 buffer->bo = NULL;
2361 }
2362 }
2363 return VK_SUCCESS;
2364 }
2365
2366 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2367 tu_BindImageMemory2(VkDevice device,
2368 uint32_t bindInfoCount,
2369 const VkBindImageMemoryInfo *pBindInfos)
2370 {
2371 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2372 TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
2373 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2374
2375 if (mem) {
2376 image->bo = mem->bo;
2377 image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2378 } else {
2379 image->bo = NULL;
2380 image->iova = 0;
2381 }
2382 }
2383
2384 return VK_SUCCESS;
2385 }
2386
2387 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)2388 tu_QueueBindSparse(VkQueue _queue,
2389 uint32_t bindInfoCount,
2390 const VkBindSparseInfo *pBindInfo,
2391 VkFence _fence)
2392 {
2393 return VK_SUCCESS;
2394 }
2395
2396 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2397 tu_CreateEvent(VkDevice _device,
2398 const VkEventCreateInfo *pCreateInfo,
2399 const VkAllocationCallbacks *pAllocator,
2400 VkEvent *pEvent)
2401 {
2402 TU_FROM_HANDLE(tu_device, device, _device);
2403
2404 struct tu_event *event =
2405 vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
2406 VK_OBJECT_TYPE_EVENT);
2407 if (!event)
2408 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2409
2410 VkResult result = tu_bo_init_new(device, &event->bo, 0x1000,
2411 TU_BO_ALLOC_NO_FLAGS);
2412 if (result != VK_SUCCESS)
2413 goto fail_alloc;
2414
2415 result = tu_bo_map(device, event->bo);
2416 if (result != VK_SUCCESS)
2417 goto fail_map;
2418
2419 *pEvent = tu_event_to_handle(event);
2420
2421 return VK_SUCCESS;
2422
2423 fail_map:
2424 tu_bo_finish(device, event->bo);
2425 fail_alloc:
2426 vk_object_free(&device->vk, pAllocator, event);
2427 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2428 }
2429
2430 VKAPI_ATTR void VKAPI_CALL
tu_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2431 tu_DestroyEvent(VkDevice _device,
2432 VkEvent _event,
2433 const VkAllocationCallbacks *pAllocator)
2434 {
2435 TU_FROM_HANDLE(tu_device, device, _device);
2436 TU_FROM_HANDLE(tu_event, event, _event);
2437
2438 if (!event)
2439 return;
2440
2441 tu_bo_finish(device, event->bo);
2442 vk_object_free(&device->vk, pAllocator, event);
2443 }
2444
2445 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetEventStatus(VkDevice _device,VkEvent _event)2446 tu_GetEventStatus(VkDevice _device, VkEvent _event)
2447 {
2448 TU_FROM_HANDLE(tu_event, event, _event);
2449
2450 if (*(uint64_t*) event->bo->map == 1)
2451 return VK_EVENT_SET;
2452 return VK_EVENT_RESET;
2453 }
2454
2455 VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device,VkEvent _event)2456 tu_SetEvent(VkDevice _device, VkEvent _event)
2457 {
2458 TU_FROM_HANDLE(tu_event, event, _event);
2459 *(uint64_t*) event->bo->map = 1;
2460
2461 return VK_SUCCESS;
2462 }
2463
2464 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device,VkEvent _event)2465 tu_ResetEvent(VkDevice _device, VkEvent _event)
2466 {
2467 TU_FROM_HANDLE(tu_event, event, _event);
2468 *(uint64_t*) event->bo->map = 0;
2469
2470 return VK_SUCCESS;
2471 }
2472
2473 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2474 tu_CreateBuffer(VkDevice _device,
2475 const VkBufferCreateInfo *pCreateInfo,
2476 const VkAllocationCallbacks *pAllocator,
2477 VkBuffer *pBuffer)
2478 {
2479 TU_FROM_HANDLE(tu_device, device, _device);
2480 struct tu_buffer *buffer;
2481
2482 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2483
2484 buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
2485 VK_OBJECT_TYPE_BUFFER);
2486 if (buffer == NULL)
2487 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2488
2489 buffer->size = pCreateInfo->size;
2490 buffer->usage = pCreateInfo->usage;
2491 buffer->flags = pCreateInfo->flags;
2492
2493 *pBuffer = tu_buffer_to_handle(buffer);
2494
2495 return VK_SUCCESS;
2496 }
2497
2498 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2499 tu_DestroyBuffer(VkDevice _device,
2500 VkBuffer _buffer,
2501 const VkAllocationCallbacks *pAllocator)
2502 {
2503 TU_FROM_HANDLE(tu_device, device, _device);
2504 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2505
2506 if (!buffer)
2507 return;
2508
2509 vk_object_free(&device->vk, pAllocator, buffer);
2510 }
2511
2512 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2513 tu_CreateFramebuffer(VkDevice _device,
2514 const VkFramebufferCreateInfo *pCreateInfo,
2515 const VkAllocationCallbacks *pAllocator,
2516 VkFramebuffer *pFramebuffer)
2517 {
2518 TU_FROM_HANDLE(tu_device, device, _device);
2519 TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
2520 struct tu_framebuffer *framebuffer;
2521
2522 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2523
2524 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
2525
2526 size_t size = sizeof(*framebuffer);
2527 if (!imageless)
2528 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
2529 framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
2530 VK_OBJECT_TYPE_FRAMEBUFFER);
2531 if (framebuffer == NULL)
2532 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2533
2534 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2535 framebuffer->width = pCreateInfo->width;
2536 framebuffer->height = pCreateInfo->height;
2537 framebuffer->layers = pCreateInfo->layers;
2538
2539 if (!imageless) {
2540 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2541 VkImageView _iview = pCreateInfo->pAttachments[i];
2542 struct tu_image_view *iview = tu_image_view_from_handle(_iview);
2543 framebuffer->attachments[i].attachment = iview;
2544 }
2545 }
2546
2547 tu_framebuffer_tiling_config(framebuffer, device, pass);
2548
2549 *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
2550 return VK_SUCCESS;
2551 }
2552
2553 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2554 tu_DestroyFramebuffer(VkDevice _device,
2555 VkFramebuffer _fb,
2556 const VkAllocationCallbacks *pAllocator)
2557 {
2558 TU_FROM_HANDLE(tu_device, device, _device);
2559 TU_FROM_HANDLE(tu_framebuffer, fb, _fb);
2560
2561 if (!fb)
2562 return;
2563
2564 vk_object_free(&device->vk, pAllocator, fb);
2565 }
2566
2567 static void
tu_init_sampler(struct tu_device * device,struct tu_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)2568 tu_init_sampler(struct tu_device *device,
2569 struct tu_sampler *sampler,
2570 const VkSamplerCreateInfo *pCreateInfo)
2571 {
2572 const struct VkSamplerReductionModeCreateInfo *reduction =
2573 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
2574 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
2575 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
2576 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
2577 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
2578 /* for non-custom border colors, the VK enum is translated directly to an offset in
2579 * the border color buffer. custom border colors are located immediately after the
2580 * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added.
2581 */
2582 uint32_t border_color = (unsigned) pCreateInfo->borderColor;
2583 if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
2584 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
2585 mtx_lock(&device->mutex);
2586 border_color = BITSET_FFS(device->custom_border_color);
2587 BITSET_CLEAR(device->custom_border_color, border_color);
2588 mtx_unlock(&device->mutex);
2589 tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
2590 &custom_border_color->customBorderColor,
2591 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
2592 border_color += TU_BORDER_COLOR_BUILTIN;
2593 }
2594
2595 unsigned aniso = pCreateInfo->anisotropyEnable ?
2596 util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
2597 bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
2598 float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
2599 float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
2600
2601 sampler->descriptor[0] =
2602 COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
2603 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
2604 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
2605 A6XX_TEX_SAMP_0_ANISO(aniso) |
2606 A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
2607 A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
2608 A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
2609 A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
2610 sampler->descriptor[1] =
2611 /* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
2612 COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
2613 A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
2614 A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
2615 COND(pCreateInfo->compareEnable,
2616 A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
2617 sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color);
2618 sampler->descriptor[3] = 0;
2619
2620 if (reduction) {
2621 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
2622 tu6_reduction_mode(reduction->reductionMode));
2623 }
2624
2625 sampler->ycbcr_sampler = ycbcr_conversion ?
2626 tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
2627
2628 if (sampler->ycbcr_sampler &&
2629 sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
2630 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
2631 }
2632
2633 /* TODO:
2634 * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
2635 */
2636 }
2637
2638 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2639 tu_CreateSampler(VkDevice _device,
2640 const VkSamplerCreateInfo *pCreateInfo,
2641 const VkAllocationCallbacks *pAllocator,
2642 VkSampler *pSampler)
2643 {
2644 TU_FROM_HANDLE(tu_device, device, _device);
2645 struct tu_sampler *sampler;
2646
2647 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2648
2649 sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
2650 VK_OBJECT_TYPE_SAMPLER);
2651 if (!sampler)
2652 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2653
2654 tu_init_sampler(device, sampler, pCreateInfo);
2655 *pSampler = tu_sampler_to_handle(sampler);
2656
2657 return VK_SUCCESS;
2658 }
2659
2660 VKAPI_ATTR void VKAPI_CALL
tu_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2661 tu_DestroySampler(VkDevice _device,
2662 VkSampler _sampler,
2663 const VkAllocationCallbacks *pAllocator)
2664 {
2665 TU_FROM_HANDLE(tu_device, device, _device);
2666 TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
2667 uint32_t border_color;
2668
2669 if (!sampler)
2670 return;
2671
2672 border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT;
2673 if (border_color >= TU_BORDER_COLOR_BUILTIN) {
2674 border_color -= TU_BORDER_COLOR_BUILTIN;
2675 /* if the sampler had a custom border color, free it. TODO: no lock */
2676 mtx_lock(&device->mutex);
2677 assert(!BITSET_TEST(device->custom_border_color, border_color));
2678 BITSET_SET(device->custom_border_color, border_color);
2679 mtx_unlock(&device->mutex);
2680 }
2681
2682 vk_object_free(&device->vk, pAllocator, sampler);
2683 }
2684
2685 /* vk_icd.h does not declare this function, so we declare it here to
2686 * suppress Wmissing-prototypes.
2687 */
2688 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2689 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2690
2691 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)2692 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2693 {
2694 /* For the full details on loader interface versioning, see
2695 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2696 * What follows is a condensed summary, to help you navigate the large and
2697 * confusing official doc.
2698 *
2699 * - Loader interface v0 is incompatible with later versions. We don't
2700 * support it.
2701 *
2702 * - In loader interface v1:
2703 * - The first ICD entrypoint called by the loader is
2704 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2705 * entrypoint.
2706 * - The ICD must statically expose no other Vulkan symbol unless it
2707 * is linked with -Bsymbolic.
2708 * - Each dispatchable Vulkan handle created by the ICD must be
2709 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2710 * ICD must initialize VK_LOADER_DATA.loadMagic to
2711 * ICD_LOADER_MAGIC.
2712 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2713 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2714 * such loader-managed surfaces.
2715 *
2716 * - Loader interface v2 differs from v1 in:
2717 * - The first ICD entrypoint called by the loader is
2718 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2719 * statically expose this entrypoint.
2720 *
2721 * - Loader interface v3 differs from v2 in:
2722 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2723 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2724 * because the loader no longer does so.
2725 *
2726 * - Loader interface v4 differs from v3 in:
2727 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2728 *
2729 * - Loader interface v5 differs from v4 in:
2730 * - The ICD must support Vulkan API version 1.1 and must not return
2731 * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
2732 * Vulkan Loader with interface v4 or smaller is being used and the
2733 * application provides an API version that is greater than 1.0.
2734 */
2735 *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
2736 return VK_SUCCESS;
2737 }
2738
2739 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2740 tu_GetMemoryFdKHR(VkDevice _device,
2741 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2742 int *pFd)
2743 {
2744 TU_FROM_HANDLE(tu_device, device, _device);
2745 TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
2746
2747 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2748
2749 /* At the moment, we support only the below handle types. */
2750 assert(pGetFdInfo->handleType ==
2751 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2752 pGetFdInfo->handleType ==
2753 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2754
2755 int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
2756 if (prime_fd < 0)
2757 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2758
2759 *pFd = prime_fd;
2760 return VK_SUCCESS;
2761 }
2762
2763 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2764 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
2765 VkExternalMemoryHandleTypeFlagBits handleType,
2766 int fd,
2767 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2768 {
2769 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2770 pMemoryFdProperties->memoryTypeBits = 1;
2771 return VK_SUCCESS;
2772 }
2773
2774 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)2775 tu_GetPhysicalDeviceExternalFenceProperties(
2776 VkPhysicalDevice physicalDevice,
2777 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
2778 VkExternalFenceProperties *pExternalFenceProperties)
2779 {
2780 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
2781 pExternalFenceProperties->compatibleHandleTypes = 0;
2782 pExternalFenceProperties->externalFenceFeatures = 0;
2783 }
2784
2785 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)2786 tu_GetDeviceGroupPeerMemoryFeatures(
2787 VkDevice device,
2788 uint32_t heapIndex,
2789 uint32_t localDeviceIndex,
2790 uint32_t remoteDeviceIndex,
2791 VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
2792 {
2793 assert(localDeviceIndex == remoteDeviceIndex);
2794
2795 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2796 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2797 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2798 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2799 }
2800
2801 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2802 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
2803 VkPhysicalDevice physicalDevice,
2804 VkSampleCountFlagBits samples,
2805 VkMultisamplePropertiesEXT* pMultisampleProperties)
2806 {
2807 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
2808
2809 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
2810 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
2811 else
2812 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
2813 }
2814
2815 VkDeviceAddress
tu_GetBufferDeviceAddress(VkDevice _device,const VkBufferDeviceAddressInfoKHR * pInfo)2816 tu_GetBufferDeviceAddress(VkDevice _device,
2817 const VkBufferDeviceAddressInfoKHR* pInfo)
2818 {
2819 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2820
2821 return buffer->iova;
2822 }
2823
tu_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfoKHR * pInfo)2824 uint64_t tu_GetBufferOpaqueCaptureAddress(
2825 VkDevice device,
2826 const VkBufferDeviceAddressInfoKHR* pInfo)
2827 {
2828 tu_stub();
2829 return 0;
2830 }
2831
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfoKHR * pInfo)2832 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
2833 VkDevice device,
2834 const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo)
2835 {
2836 tu_stub();
2837 return 0;
2838 }
2839