1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25  * DEALINGS IN THE SOFTWARE.
26  */
27 
28 #include "tu_private.h"
29 #include "tu_cs.h"
30 #include "git_sha1.h"
31 
32 #include <fcntl.h>
33 #include <poll.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <sys/sysinfo.h>
37 #include <unistd.h>
38 
39 #include "util/debug.h"
40 #include "util/disk_cache.h"
41 #include "util/driconf.h"
42 #include "util/os_misc.h"
43 #include "util/u_atomic.h"
44 #include "vk_format.h"
45 #include "vk_util.h"
46 
47 /* for fd_get_driver/device_uuid() */
48 #include "freedreno/common/freedreno_uuid.h"
49 
50 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
51      defined(VK_USE_PLATFORM_XCB_KHR) || \
52      defined(VK_USE_PLATFORM_XLIB_KHR) || \
53      defined(VK_USE_PLATFORM_DISPLAY_KHR)
54 #define TU_HAS_SURFACE 1
55 #else
56 #define TU_HAS_SURFACE 0
57 #endif
58 
59 
60 static int
tu_device_get_cache_uuid(uint16_t family,void * uuid)61 tu_device_get_cache_uuid(uint16_t family, void *uuid)
62 {
63    uint32_t mesa_timestamp;
64    uint16_t f = family;
65    memset(uuid, 0, VK_UUID_SIZE);
66    if (!disk_cache_get_function_timestamp(tu_device_get_cache_uuid,
67                                           &mesa_timestamp))
68       return -1;
69 
70    memcpy(uuid, &mesa_timestamp, 4);
71    memcpy((char *) uuid + 4, &f, 2);
72    snprintf((char *) uuid + 6, VK_UUID_SIZE - 10, "tu");
73    return 0;
74 }
75 
76 #define TU_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
77 
78 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)79 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
80 {
81     *pApiVersion = TU_API_VERSION;
82     return VK_SUCCESS;
83 }
84 
85 static const struct vk_instance_extension_table tu_instance_extensions_supported = {
86    .KHR_device_group_creation           = true,
87    .KHR_external_fence_capabilities     = true,
88    .KHR_external_memory_capabilities    = true,
89    .KHR_external_semaphore_capabilities = true,
90    .KHR_get_physical_device_properties2 = true,
91    .KHR_surface                         = TU_HAS_SURFACE,
92    .KHR_get_surface_capabilities2       = TU_HAS_SURFACE,
93    .EXT_debug_report                    = true,
94 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
95    .KHR_wayland_surface                 = true,
96 #endif
97 #ifdef VK_USE_PLATFORM_XCB_KHR
98    .KHR_xcb_surface                     = true,
99 #endif
100 #ifdef VK_USE_PLATFORM_XLIB_KHR
101    .KHR_xlib_surface                    = true,
102 #endif
103 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
104    .EXT_acquire_xlib_display            = true,
105 #endif
106 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
107    .KHR_display                         = true,
108    .KHR_get_display_properties2         = true,
109    .EXT_direct_mode_display             = true,
110    .EXT_display_surface_counter         = true,
111 #endif
112 };
113 
114 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)115 get_device_extensions(const struct tu_physical_device *device,
116                       struct vk_device_extension_table *ext)
117 {
118    *ext = (struct vk_device_extension_table) {
119       .KHR_16bit_storage = device->info->a6xx.storage_16bit,
120       .KHR_bind_memory2 = true,
121       .KHR_copy_commands2 = true,
122       .KHR_create_renderpass2 = true,
123       .KHR_dedicated_allocation = true,
124       .KHR_depth_stencil_resolve = true,
125       .KHR_descriptor_update_template = true,
126       .KHR_device_group = true,
127       .KHR_draw_indirect_count = true,
128       .KHR_external_fence = true,
129       .KHR_external_fence_fd = true,
130       .KHR_external_memory = true,
131       .KHR_external_memory_fd = true,
132       .KHR_external_semaphore = true,
133       .KHR_external_semaphore_fd = true,
134       .KHR_get_memory_requirements2 = true,
135       .KHR_imageless_framebuffer = true,
136       .KHR_incremental_present = TU_HAS_SURFACE,
137       .KHR_image_format_list = true,
138       .KHR_maintenance1 = true,
139       .KHR_maintenance2 = true,
140       .KHR_maintenance3 = true,
141       .KHR_multiview = true,
142       .KHR_performance_query = device->instance->debug_flags & TU_DEBUG_PERFC,
143       .KHR_pipeline_executable_properties = true,
144       .KHR_push_descriptor = true,
145       .KHR_relaxed_block_layout = true,
146       .KHR_sampler_mirror_clamp_to_edge = true,
147       .KHR_sampler_ycbcr_conversion = true,
148       .KHR_shader_draw_parameters = true,
149       .KHR_shader_float_controls = true,
150       .KHR_shader_float16_int8 = true,
151       .KHR_shader_subgroup_extended_types = true,
152       .KHR_shader_terminate_invocation = true,
153       .KHR_spirv_1_4 = true,
154       .KHR_storage_buffer_storage_class = true,
155       .KHR_swapchain = TU_HAS_SURFACE,
156       .KHR_uniform_buffer_standard_layout = true,
157       .KHR_variable_pointers = true,
158       .KHR_vulkan_memory_model = true,
159       .KHR_driver_properties = true,
160       .KHR_separate_depth_stencil_layouts = true,
161       .KHR_buffer_device_address = true,
162       .KHR_shader_integer_dot_product = true,
163       .KHR_zero_initialize_workgroup_memory = true,
164       .KHR_shader_non_semantic_info = true,
165 #ifndef TU_USE_KGSL
166       .KHR_timeline_semaphore = true,
167 #endif
168 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
169       .EXT_display_control = true,
170 #endif
171       .EXT_external_memory_dma_buf = true,
172       .EXT_image_drm_format_modifier = true,
173       .EXT_sample_locations = device->info->a6xx.has_sample_locations,
174       .EXT_sampler_filter_minmax = true,
175       .EXT_transform_feedback = true,
176       .EXT_4444_formats = true,
177       .EXT_conditional_rendering = true,
178       .EXT_custom_border_color = true,
179       .EXT_depth_clip_control = true,
180       .EXT_depth_clip_enable = true,
181       .EXT_descriptor_indexing = true,
182       .EXT_extended_dynamic_state = true,
183       .EXT_extended_dynamic_state2 = true,
184       .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
185       .EXT_host_query_reset = true,
186       .EXT_index_type_uint8 = true,
187       .EXT_memory_budget = true,
188       .EXT_primitive_topology_list_restart = true,
189       .EXT_private_data = true,
190       .EXT_queue_family_foreign = true,
191       .EXT_robustness2 = true,
192       .EXT_scalar_block_layout = true,
193       .EXT_separate_stencil_usage = true,
194       .EXT_shader_demote_to_helper_invocation = true,
195       .EXT_shader_stencil_export = true,
196       .EXT_shader_viewport_index_layer = true,
197       .EXT_texel_buffer_alignment = true,
198       .EXT_vertex_attribute_divisor = true,
199       .EXT_provoking_vertex = true,
200       .EXT_line_rasterization = true,
201       .EXT_subgroup_size_control = true,
202       .EXT_image_robustness = true,
203 #ifndef TU_USE_KGSL
204       .EXT_physical_device_drm = true,
205 #endif
206       /* For Graphics Flight Recorder (GFR) */
207       .AMD_buffer_marker = true,
208       .ARM_rasterization_order_attachment_access = true,
209 #ifdef ANDROID
210       .ANDROID_native_buffer = true,
211 #endif
212       .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
213       .VALVE_mutable_descriptor_type = true,
214    };
215 }
216 
217 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)218 tu_physical_device_init(struct tu_physical_device *device,
219                         struct tu_instance *instance)
220 {
221    VkResult result = VK_SUCCESS;
222 
223    const char *fd_name = fd_dev_name(&device->dev_id);
224    if (strncmp(fd_name, "FD", 2) == 0) {
225       device->name = vk_asprintf(&instance->vk.alloc,
226                                  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
227                                  "Turnip Adreno (TM) %s", &fd_name[2]);
228    } else {
229       device->name = vk_strdup(&instance->vk.alloc, fd_name,
230                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
231 
232    }
233    if (!device->name) {
234       return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
235                                "device name alloc fail");
236    }
237 
238    const struct fd_dev_info *info = fd_dev_info(&device->dev_id);
239    if (!info) {
240       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
241                                  "device %s is unsupported", device->name);
242       goto fail_free_name;
243    }
244    switch (fd_dev_gen(&device->dev_id)) {
245    case 6:
246       device->info = info;
247       device->ccu_offset_bypass = device->info->num_ccu * A6XX_CCU_DEPTH_SIZE;
248       device->ccu_offset_gmem = (device->gmem_size -
249          device->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
250       break;
251    default:
252       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
253                                  "device %s is unsupported", device->name);
254       goto fail_free_name;
255    }
256    if (tu_device_get_cache_uuid(fd_dev_gpu_id(&device->dev_id), device->cache_uuid)) {
257       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
258                                  "cannot generate UUID");
259       goto fail_free_name;
260    }
261 
262    /* The gpu id is already embedded in the uuid so we just pass "tu"
263     * when creating the cache.
264     */
265    char buf[VK_UUID_SIZE * 2 + 1];
266    disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
267    device->disk_cache = disk_cache_create(device->name, buf, 0);
268 
269    fd_get_driver_uuid(device->driver_uuid);
270    fd_get_device_uuid(device->device_uuid, &device->dev_id);
271 
272    struct vk_device_extension_table supported_extensions;
273    get_device_extensions(device, &supported_extensions);
274 
275    struct vk_physical_device_dispatch_table dispatch_table;
276    vk_physical_device_dispatch_table_from_entrypoints(
277       &dispatch_table, &tu_physical_device_entrypoints, true);
278    vk_physical_device_dispatch_table_from_entrypoints(
279       &dispatch_table, &wsi_physical_device_entrypoints, false);
280 
281    result = vk_physical_device_init(&device->vk, &instance->vk,
282                                     &supported_extensions,
283                                     &dispatch_table);
284    if (result != VK_SUCCESS)
285       goto fail_free_cache;
286 
287 #if TU_HAS_SURFACE
288    result = tu_wsi_init(device);
289    if (result != VK_SUCCESS) {
290       vk_startup_errorf(instance, result, "WSI init failure");
291       vk_physical_device_finish(&device->vk);
292       goto fail_free_cache;
293    }
294 #endif
295 
296    return VK_SUCCESS;
297 
298 fail_free_cache:
299    disk_cache_destroy(device->disk_cache);
300 fail_free_name:
301    vk_free(&instance->vk.alloc, (void *)device->name);
302    return result;
303 }
304 
305 static void
tu_physical_device_finish(struct tu_physical_device * device)306 tu_physical_device_finish(struct tu_physical_device *device)
307 {
308 #if TU_HAS_SURFACE
309    tu_wsi_finish(device);
310 #endif
311 
312    disk_cache_destroy(device->disk_cache);
313    close(device->local_fd);
314    if (device->master_fd != -1)
315       close(device->master_fd);
316 
317    vk_free(&device->instance->vk.alloc, (void *)device->name);
318 
319    vk_physical_device_finish(&device->vk);
320 }
321 
322 static const struct debug_control tu_debug_options[] = {
323    { "startup", TU_DEBUG_STARTUP },
324    { "nir", TU_DEBUG_NIR },
325    { "nobin", TU_DEBUG_NOBIN },
326    { "sysmem", TU_DEBUG_SYSMEM },
327    { "gmem", TU_DEBUG_GMEM },
328    { "forcebin", TU_DEBUG_FORCEBIN },
329    { "noubwc", TU_DEBUG_NOUBWC },
330    { "nomultipos", TU_DEBUG_NOMULTIPOS },
331    { "nolrz", TU_DEBUG_NOLRZ },
332    { "perfc", TU_DEBUG_PERFC },
333    { "flushall", TU_DEBUG_FLUSHALL },
334    { "syncdraw", TU_DEBUG_SYNCDRAW },
335    { "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD },
336    { "rast_order", TU_DEBUG_RAST_ORDER },
337    { NULL, 0 }
338 };
339 
340 const char *
tu_get_debug_option_name(int id)341 tu_get_debug_option_name(int id)
342 {
343    assert(id < ARRAY_SIZE(tu_debug_options) - 1);
344    return tu_debug_options[id].string;
345 }
346 
347 static const driOptionDescription tu_dri_options[] = {
348    DRI_CONF_SECTION_PERFORMANCE
349       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
350       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
351       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
352       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
353    DRI_CONF_SECTION_END
354 
355    DRI_CONF_SECTION_DEBUG
356       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
357       DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
358    DRI_CONF_SECTION_END
359 };
360 
361 static void
tu_init_dri_options(struct tu_instance * instance)362 tu_init_dri_options(struct tu_instance *instance)
363 {
364    driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
365                       ARRAY_SIZE(tu_dri_options));
366    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
367                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
368                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
369 
370    if (driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load"))
371       instance->debug_flags |= TU_DEBUG_DONT_CARE_AS_LOAD;
372 }
373 
374 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)375 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
376                   const VkAllocationCallbacks *pAllocator,
377                   VkInstance *pInstance)
378 {
379    struct tu_instance *instance;
380    VkResult result;
381 
382    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
383 
384    if (pAllocator == NULL)
385       pAllocator = vk_default_allocator();
386 
387    instance = vk_zalloc(pAllocator, sizeof(*instance), 8,
388                         VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
389 
390    if (!instance)
391       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
392 
393    struct vk_instance_dispatch_table dispatch_table;
394    vk_instance_dispatch_table_from_entrypoints(
395       &dispatch_table, &tu_instance_entrypoints, true);
396    vk_instance_dispatch_table_from_entrypoints(
397       &dispatch_table, &wsi_instance_entrypoints, false);
398 
399    result = vk_instance_init(&instance->vk,
400                              &tu_instance_extensions_supported,
401                              &dispatch_table,
402                              pCreateInfo, pAllocator);
403    if (result != VK_SUCCESS) {
404       vk_free(pAllocator, instance);
405       return vk_error(NULL, result);
406    }
407 
408    instance->physical_device_count = -1;
409 
410    instance->debug_flags =
411       parse_debug_string(os_get_option("TU_DEBUG"), tu_debug_options);
412 
413 #ifdef DEBUG
414    /* Enable startup debugging by default on debug drivers.  You almost always
415     * want to see your startup failures in that case, and it's hard to set
416     * this env var on android.
417     */
418    instance->debug_flags |= TU_DEBUG_STARTUP;
419 #endif
420 
421    if (instance->debug_flags & TU_DEBUG_STARTUP)
422       mesa_logi("Created an instance");
423 
424    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
425 
426    tu_init_dri_options(instance);
427 
428    *pInstance = tu_instance_to_handle(instance);
429 
430 #ifdef HAVE_PERFETTO
431    tu_perfetto_init();
432 #endif
433 
434    return VK_SUCCESS;
435 }
436 
437 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)438 tu_DestroyInstance(VkInstance _instance,
439                    const VkAllocationCallbacks *pAllocator)
440 {
441    TU_FROM_HANDLE(tu_instance, instance, _instance);
442 
443    if (!instance)
444       return;
445 
446    for (int i = 0; i < instance->physical_device_count; ++i) {
447       tu_physical_device_finish(instance->physical_devices + i);
448    }
449 
450    VG(VALGRIND_DESTROY_MEMPOOL(instance));
451 
452    driDestroyOptionCache(&instance->dri_options);
453    driDestroyOptionInfo(&instance->available_dri_options);
454 
455    vk_instance_finish(&instance->vk);
456    vk_free(&instance->vk.alloc, instance);
457 }
458 
459 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)460 tu_EnumeratePhysicalDevices(VkInstance _instance,
461                             uint32_t *pPhysicalDeviceCount,
462                             VkPhysicalDevice *pPhysicalDevices)
463 {
464    TU_FROM_HANDLE(tu_instance, instance, _instance);
465    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
466                           pPhysicalDevices, pPhysicalDeviceCount);
467 
468    VkResult result;
469 
470    if (instance->physical_device_count < 0) {
471       result = tu_enumerate_devices(instance);
472       if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
473          return result;
474    }
475 
476    for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
477       vk_outarray_append_typed(VkPhysicalDevice, &out, p)
478       {
479          *p = tu_physical_device_to_handle(instance->physical_devices + i);
480       }
481    }
482 
483    return vk_outarray_status(&out);
484 }
485 
486 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)487 tu_EnumeratePhysicalDeviceGroups(
488    VkInstance _instance,
489    uint32_t *pPhysicalDeviceGroupCount,
490    VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
491 {
492    TU_FROM_HANDLE(tu_instance, instance, _instance);
493    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
494                           pPhysicalDeviceGroupProperties,
495                           pPhysicalDeviceGroupCount);
496    VkResult result;
497 
498    if (instance->physical_device_count < 0) {
499       result = tu_enumerate_devices(instance);
500       if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
501          return result;
502    }
503 
504    for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
505       vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
506       {
507          p->physicalDeviceCount = 1;
508          p->physicalDevices[0] =
509             tu_physical_device_to_handle(instance->physical_devices + i);
510          p->subsetAllocation = false;
511       }
512    }
513 
514    return vk_outarray_status(&out);
515 }
516 
517 static void
tu_get_physical_device_features_1_1(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * features)518 tu_get_physical_device_features_1_1(struct tu_physical_device *pdevice,
519                                     VkPhysicalDeviceVulkan11Features *features)
520 {
521    features->storageBuffer16BitAccess            = pdevice->info->a6xx.storage_16bit;
522    features->uniformAndStorageBuffer16BitAccess  = false;
523    features->storagePushConstant16               = false;
524    features->storageInputOutput16                = false;
525    features->multiview                           = true;
526    features->multiviewGeometryShader             = false;
527    features->multiviewTessellationShader         = false;
528    features->variablePointersStorageBuffer       = true;
529    features->variablePointers                    = true;
530    features->protectedMemory                     = false;
531    features->samplerYcbcrConversion              = true;
532    features->shaderDrawParameters                = true;
533 }
534 
535 static void
tu_get_physical_device_features_1_2(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * features)536 tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
537                                     VkPhysicalDeviceVulkan12Features *features)
538 {
539    features->samplerMirrorClampToEdge            = true;
540    features->drawIndirectCount                   = true;
541    features->storageBuffer8BitAccess             = false;
542    features->uniformAndStorageBuffer8BitAccess   = false;
543    features->storagePushConstant8                = false;
544    features->shaderBufferInt64Atomics            = false;
545    features->shaderSharedInt64Atomics            = false;
546    features->shaderFloat16                       = true;
547    features->shaderInt8                          = false;
548 
549    features->descriptorIndexing                                 = true;
550    features->shaderInputAttachmentArrayDynamicIndexing          = false;
551    features->shaderUniformTexelBufferArrayDynamicIndexing       = true;
552    features->shaderStorageTexelBufferArrayDynamicIndexing       = true;
553    features->shaderUniformBufferArrayNonUniformIndexing         = true;
554    features->shaderSampledImageArrayNonUniformIndexing          = true;
555    features->shaderStorageBufferArrayNonUniformIndexing         = true;
556    features->shaderStorageImageArrayNonUniformIndexing          = true;
557    features->shaderInputAttachmentArrayNonUniformIndexing       = false;
558    features->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
559    features->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
560    features->descriptorBindingUniformBufferUpdateAfterBind      = true;
561    features->descriptorBindingSampledImageUpdateAfterBind       = true;
562    features->descriptorBindingStorageImageUpdateAfterBind       = true;
563    features->descriptorBindingStorageBufferUpdateAfterBind      = true;
564    features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
565    features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
566    features->descriptorBindingUpdateUnusedWhilePending          = true;
567    features->descriptorBindingPartiallyBound                    = true;
568    features->descriptorBindingVariableDescriptorCount           = true;
569    features->runtimeDescriptorArray                             = true;
570 
571    features->samplerFilterMinmax                 = true;
572    features->scalarBlockLayout                   = true;
573    features->imagelessFramebuffer                = true;
574    features->uniformBufferStandardLayout         = true;
575    features->shaderSubgroupExtendedTypes         = true;
576    features->separateDepthStencilLayouts         = true;
577    features->hostQueryReset                      = true;
578    features->timelineSemaphore                   = true;
579    features->bufferDeviceAddress                 = true;
580    features->bufferDeviceAddressCaptureReplay    = false;
581    features->bufferDeviceAddressMultiDevice      = false;
582    features->vulkanMemoryModel                   = true;
583    features->vulkanMemoryModelDeviceScope        = true;
584    features->vulkanMemoryModelAvailabilityVisibilityChains = true;
585    features->shaderOutputViewportIndex           = true;
586    features->shaderOutputLayer                   = true;
587    features->subgroupBroadcastDynamicId          = true;
588 }
589 
590 static void
tu_get_physical_device_features_1_3(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan13Features * features)591 tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice,
592                                     VkPhysicalDeviceVulkan13Features *features)
593 {
594    features->robustImageAccess                   = true;
595    features->inlineUniformBlock                  = false;
596    features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
597    features->pipelineCreationCacheControl        = false;
598    features->privateData                         = true;
599    features->shaderDemoteToHelperInvocation      = true;
600    features->shaderTerminateInvocation           = true;
601    features->subgroupSizeControl                 = true;
602    features->computeFullSubgroups                = true;
603    features->synchronization2                    = false;
604    features->textureCompressionASTC_HDR          = false;
605    features->shaderZeroInitializeWorkgroupMemory = true;
606    features->dynamicRendering                    = false;
607    features->shaderIntegerDotProduct             = true;
608    features->maintenance4                        = false;
609 }
610 
611 void
tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)612 tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
613                               VkPhysicalDeviceFeatures2 *pFeatures)
614 {
615    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
616 
617    pFeatures->features = (VkPhysicalDeviceFeatures) {
618       .robustBufferAccess = true,
619       .fullDrawIndexUint32 = true,
620       .imageCubeArray = true,
621       .independentBlend = true,
622       .geometryShader = true,
623       .tessellationShader = true,
624       .sampleRateShading = true,
625       .dualSrcBlend = true,
626       .logicOp = true,
627       .multiDrawIndirect = true,
628       .drawIndirectFirstInstance = true,
629       .depthClamp = true,
630       .depthBiasClamp = true,
631       .fillModeNonSolid = true,
632       .depthBounds = true,
633       .wideLines = false,
634       .largePoints = true,
635       .alphaToOne = true,
636       .multiViewport = true,
637       .samplerAnisotropy = true,
638       .textureCompressionETC2 = true,
639       .textureCompressionASTC_LDR = true,
640       .textureCompressionBC = true,
641       .occlusionQueryPrecise = true,
642       .pipelineStatisticsQuery = true,
643       .vertexPipelineStoresAndAtomics = true,
644       .fragmentStoresAndAtomics = true,
645       .shaderTessellationAndGeometryPointSize = false,
646       .shaderImageGatherExtended = true,
647       .shaderStorageImageExtendedFormats = true,
648       .shaderStorageImageMultisample = false,
649       .shaderUniformBufferArrayDynamicIndexing = true,
650       .shaderSampledImageArrayDynamicIndexing = true,
651       .shaderStorageBufferArrayDynamicIndexing = true,
652       .shaderStorageImageArrayDynamicIndexing = true,
653       .shaderStorageImageReadWithoutFormat = true,
654       .shaderStorageImageWriteWithoutFormat = true,
655       .shaderClipDistance = true,
656       .shaderCullDistance = true,
657       .shaderFloat64 = false,
658       .shaderInt64 = false,
659       .shaderInt16 = true,
660       .sparseBinding = false,
661       .variableMultisampleRate = true,
662       .inheritedQueries = true,
663    };
664 
665    VkPhysicalDeviceVulkan11Features core_1_1 = {
666       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
667    };
668    tu_get_physical_device_features_1_1(pdevice, &core_1_1);
669 
670    VkPhysicalDeviceVulkan12Features core_1_2 = {
671       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
672    };
673    tu_get_physical_device_features_1_2(pdevice, &core_1_2);
674 
675    VkPhysicalDeviceVulkan13Features core_1_3 = {
676       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
677    };
678    tu_get_physical_device_features_1_3(pdevice, &core_1_3);
679 
680    vk_foreach_struct(ext, pFeatures->pNext)
681    {
682       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
683          continue;
684       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
685          continue;
686       if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
687          continue;
688 
689       switch (ext->sType) {
690       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
691          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
692             (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext;
693          features->conditionalRendering = true;
694          features->inheritedConditionalRendering = true;
695          break;
696       }
697       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
698          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
699             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext;
700          features->transformFeedback = true;
701          features->geometryStreams = true;
702          break;
703       }
704       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
705          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
706             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
707          features->indexTypeUint8 = true;
708          break;
709       }
710       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
711          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
712             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
713          features->vertexAttributeInstanceRateDivisor = true;
714          features->vertexAttributeInstanceRateZeroDivisor = true;
715          break;
716       }
717       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
718          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
719             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
720          features->depthClipEnable = true;
721          break;
722       }
723       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
724          VkPhysicalDevice4444FormatsFeaturesEXT *features = (void *)ext;
725          features->formatA4R4G4B4 = true;
726          features->formatA4B4G4R4 = true;
727          break;
728       }
729       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
730          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext;
731          features->customBorderColors = true;
732          features->customBorderColorWithoutFormat = true;
733          break;
734       }
735       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
736          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext;
737          features->extendedDynamicState = true;
738          break;
739       }
740       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
741          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
742             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
743          features->extendedDynamicState2 = true;
744          features->extendedDynamicState2LogicOp = false;
745          features->extendedDynamicState2PatchControlPoints = false;
746          break;
747       }
748       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
749          VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
750             (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
751          feature->performanceCounterQueryPools = true;
752          feature->performanceCounterMultipleQueryPools = false;
753          break;
754       }
755       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
756          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
757             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
758          features->pipelineExecutableInfo = true;
759          break;
760       }
761       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
762          VkPhysicalDeviceShaderFloat16Int8Features *features =
763             (VkPhysicalDeviceShaderFloat16Int8Features *) ext;
764          features->shaderFloat16 = true;
765          features->shaderInt8 = false;
766          break;
767       }
768       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
769          VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = (void *)ext;
770          features->scalarBlockLayout = true;
771          break;
772       }
773       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
774          VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
775          features->robustBufferAccess2 = true;
776          features->robustImageAccess2 = true;
777          features->nullDescriptor = true;
778          break;
779       }
780       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
781          VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
782             (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
783          features->timelineSemaphore = true;
784          break;
785       }
786       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
787          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
788             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
789          features->provokingVertexLast = true;
790          features->transformFeedbackPreservesProvokingVertex = true;
791          break;
792       }
793       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
794          VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
795             (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
796          features->mutableDescriptorType = true;
797          break;
798       }
799       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
800          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
801             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
802          features->rectangularLines = true;
803          features->bresenhamLines = true;
804          features->smoothLines = false;
805          features->stippledRectangularLines = false;
806          features->stippledBresenhamLines = false;
807          features->stippledSmoothLines = false;
808          break;
809       }
810       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
811          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
812             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
813          features->primitiveTopologyListRestart = true;
814          features->primitiveTopologyPatchListRestart = false;
815          break;
816       }
817       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM: {
818          VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *features =
819             (VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *)ext;
820          features->rasterizationOrderColorAttachmentAccess = true;
821          features->rasterizationOrderDepthAttachmentAccess = true;
822          features->rasterizationOrderStencilAttachmentAccess = true;
823          break;
824       }
825       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
826          VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
827             (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
828          features->depthClipControl = true;
829          break;
830       }
831       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
832          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
833             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
834          features->texelBufferAlignment = true;
835          break;
836       }
837 
838       default:
839          break;
840       }
841    }
842 }
843 
844 
845 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)846 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
847                                        VkPhysicalDeviceVulkan11Properties *p)
848 {
849    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
850 
851    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
852    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
853    memset(p->deviceLUID, 0, VK_LUID_SIZE);
854    p->deviceNodeMask = 0;
855    p->deviceLUIDValid = false;
856 
857    p->subgroupSize = 128;
858    p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
859    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
860                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
861                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
862                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
863                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
864                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
865    if (pdevice->info->a6xx.has_getfiberid) {
866       p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
867       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
868    }
869 
870    p->subgroupQuadOperationsInAllStages = false;
871 
872    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
873    p->maxMultiviewViewCount = MAX_VIEWS;
874    p->maxMultiviewInstanceIndex = INT_MAX;
875    p->protectedNoFault = false;
876    /* Make sure everything is addressable by a signed 32-bit int, and
877     * our largest descriptors are 96 bytes.
878     */
879    p->maxPerSetDescriptors = (1ull << 31) / 96;
880    /* Our buffer size fields allow only this much */
881    p->maxMemoryAllocationSize = 0xFFFFFFFFull;
882 
883 }
884 
885 
886 /* I have no idea what the maximum size is, but the hardware supports very
887  * large numbers of descriptors (at least 2^16). This limit is based on
888  * CP_LOAD_STATE6, which has a 28-bit field for the DWORD offset, so that
889  * we don't have to think about what to do if that overflows, but really
890  * nothing is likely to get close to this.
891  */
892 static const size_t max_descriptor_set_size = (1 << 28) / A6XX_TEX_CONST_DWORDS;
893 static const VkSampleCountFlags sample_counts =
894    VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
895 
896 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)897 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
898                                        VkPhysicalDeviceVulkan12Properties *p)
899 {
900    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
901 
902    p->driverID = VK_DRIVER_ID_MESA_TURNIP;
903    memset(p->driverName, 0, sizeof(p->driverName));
904    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
905             "turnip Mesa driver");
906    memset(p->driverInfo, 0, sizeof(p->driverInfo));
907    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
908             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
909    p->conformanceVersion = (VkConformanceVersionKHR) {
910       .major = 1,
911       .minor = 2,
912       .subminor = 7,
913       .patch = 1,
914    };
915 
916    p->denormBehaviorIndependence =
917       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
918    p->roundingModeIndependence =
919       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
920 
921    p->shaderDenormFlushToZeroFloat16         = true;
922    p->shaderDenormPreserveFloat16            = false;
923    p->shaderRoundingModeRTEFloat16           = true;
924    p->shaderRoundingModeRTZFloat16           = false;
925    p->shaderSignedZeroInfNanPreserveFloat16  = true;
926 
927    p->shaderDenormFlushToZeroFloat32         = true;
928    p->shaderDenormPreserveFloat32            = false;
929    p->shaderRoundingModeRTEFloat32           = true;
930    p->shaderRoundingModeRTZFloat32           = false;
931    p->shaderSignedZeroInfNanPreserveFloat32  = true;
932 
933    p->shaderDenormFlushToZeroFloat64         = false;
934    p->shaderDenormPreserveFloat64            = false;
935    p->shaderRoundingModeRTEFloat64           = false;
936    p->shaderRoundingModeRTZFloat64           = false;
937    p->shaderSignedZeroInfNanPreserveFloat64  = false;
938 
939    p->shaderUniformBufferArrayNonUniformIndexingNative   = true;
940    p->shaderSampledImageArrayNonUniformIndexingNative    = true;
941    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
942    p->shaderStorageImageArrayNonUniformIndexingNative    = true;
943    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
944    p->robustBufferAccessUpdateAfterBind                  = false;
945    p->quadDivergentImplicitLod                           = false;
946 
947    p->maxUpdateAfterBindDescriptorsInAllPools            = max_descriptor_set_size;
948    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_descriptor_set_size;
949    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
950    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
951    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_descriptor_set_size;
952    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_descriptor_set_size;
953    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
954    p->maxPerStageUpdateAfterBindResources                = max_descriptor_set_size;
955    p->maxDescriptorSetUpdateAfterBindSamplers            = max_descriptor_set_size;
956    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = max_descriptor_set_size;
957    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
958    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = max_descriptor_set_size;
959    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
960    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_descriptor_set_size;
961    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_descriptor_set_size;
962    p->maxDescriptorSetUpdateAfterBindInputAttachments    = max_descriptor_set_size;
963 
964    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
965    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
966    p->independentResolveNone  = false;
967    p->independentResolve      = false;
968 
969    p->filterMinmaxSingleComponentFormats  = true;
970    p->filterMinmaxImageComponentMapping   = true;
971 
972    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
973 
974    p->framebufferIntegerColorSampleCounts = sample_counts;
975 }
976 
977 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan13Properties * p)978 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
979                                        VkPhysicalDeviceVulkan13Properties *p)
980 {
981    /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
982    p->minSubgroupSize = 64; /* threadsize_base */
983    p->maxSubgroupSize = 128; /* threadsize_base * 2 */
984    p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
985    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
986 
987    /* VK_EXT_inline_uniform_block is not implemented */
988    p->maxInlineUniformBlockSize = 0;
989    p->maxPerStageDescriptorInlineUniformBlocks = 0;
990    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 0;
991    p->maxDescriptorSetInlineUniformBlocks = 0;
992    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 0;
993    p->maxInlineUniformTotalSize = 0;
994 
995    p->integerDotProduct8BitUnsignedAccelerated = false;
996    p->integerDotProduct8BitSignedAccelerated = false;
997    p->integerDotProduct8BitMixedSignednessAccelerated = false;
998    p->integerDotProduct4x8BitPackedUnsignedAccelerated =
999       pdevice->info->a6xx.has_dp2acc;
1000    /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
1001    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
1002    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
1003       pdevice->info->a6xx.has_dp2acc;
1004    p->integerDotProduct16BitUnsignedAccelerated = false;
1005    p->integerDotProduct16BitSignedAccelerated = false;
1006    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1007    p->integerDotProduct32BitUnsignedAccelerated = false;
1008    p->integerDotProduct32BitSignedAccelerated = false;
1009    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1010    p->integerDotProduct64BitUnsignedAccelerated = false;
1011    p->integerDotProduct64BitSignedAccelerated = false;
1012    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1013    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1014    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1015    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1016    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
1017       pdevice->info->a6xx.has_dp2acc;
1018    /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
1019    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
1020    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
1021       pdevice->info->a6xx.has_dp2acc;
1022    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1023    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1024    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1025    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1026    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1027    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1028    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1029    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1030    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1031 
1032    p->storageTexelBufferOffsetAlignmentBytes = 64;
1033    p->storageTexelBufferOffsetSingleTexelAlignment = false;
1034    p->uniformTexelBufferOffsetAlignmentBytes = 64;
1035    p->uniformTexelBufferOffsetSingleTexelAlignment = false;
1036 
1037    /* TODO: find out the limit */
1038    p->maxBufferSize = 0;
1039 }
1040 
1041 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1042 tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1043                                 VkPhysicalDeviceProperties2 *pProperties)
1044 {
1045    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1046 
1047    VkPhysicalDeviceLimits limits = {
1048       .maxImageDimension1D = (1 << 14),
1049       .maxImageDimension2D = (1 << 14),
1050       .maxImageDimension3D = (1 << 11),
1051       .maxImageDimensionCube = (1 << 14),
1052       .maxImageArrayLayers = (1 << 11),
1053       .maxTexelBufferElements = 128 * 1024 * 1024,
1054       .maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE,
1055       .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
1056       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1057       .maxMemoryAllocationCount = UINT32_MAX,
1058       .maxSamplerAllocationCount = 64 * 1024,
1059       .bufferImageGranularity = 64,          /* A cache line */
1060       .sparseAddressSpaceSize = 0,
1061       .maxBoundDescriptorSets = MAX_SETS,
1062       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1063       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1064       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1065       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1066       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1067       .maxPerStageDescriptorInputAttachments = MAX_RTS,
1068       .maxPerStageResources = max_descriptor_set_size,
1069       .maxDescriptorSetSamplers = max_descriptor_set_size,
1070       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1071       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1072       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1073       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1074       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1075       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1076       .maxDescriptorSetInputAttachments = MAX_RTS,
1077       .maxVertexInputAttributes = 32,
1078       .maxVertexInputBindings = 32,
1079       .maxVertexInputAttributeOffset = 4095,
1080       .maxVertexInputBindingStride = 2048,
1081       .maxVertexOutputComponents = 128,
1082       .maxTessellationGenerationLevel = 64,
1083       .maxTessellationPatchSize = 32,
1084       .maxTessellationControlPerVertexInputComponents = 128,
1085       .maxTessellationControlPerVertexOutputComponents = 128,
1086       .maxTessellationControlPerPatchOutputComponents = 120,
1087       .maxTessellationControlTotalOutputComponents = 4096,
1088       .maxTessellationEvaluationInputComponents = 128,
1089       .maxTessellationEvaluationOutputComponents = 128,
1090       .maxGeometryShaderInvocations = 32,
1091       .maxGeometryInputComponents = 64,
1092       .maxGeometryOutputComponents = 128,
1093       .maxGeometryOutputVertices = 256,
1094       .maxGeometryTotalOutputComponents = 1024,
1095       .maxFragmentInputComponents = 124,
1096       .maxFragmentOutputAttachments = 8,
1097       .maxFragmentDualSrcAttachments = 1,
1098       .maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2,
1099       .maxComputeSharedMemorySize = 32768,
1100       .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1101       .maxComputeWorkGroupInvocations = 2048,
1102       .maxComputeWorkGroupSize = { 1024, 1024, 1024 },
1103       .subPixelPrecisionBits = 8,
1104       .subTexelPrecisionBits = 8,
1105       .mipmapPrecisionBits = 8,
1106       .maxDrawIndexedIndexValue = UINT32_MAX,
1107       .maxDrawIndirectCount = UINT32_MAX,
1108       .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */
1109       .maxSamplerAnisotropy = 16,
1110       .maxViewports = MAX_VIEWPORTS,
1111       .maxViewportDimensions = { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
1112       .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1113       .viewportSubPixelBits = 8,
1114       .minMemoryMapAlignment = 4096, /* A page */
1115       .minTexelBufferOffsetAlignment = 64,
1116       .minUniformBufferOffsetAlignment = 64,
1117       .minStorageBufferOffsetAlignment = 64,
1118       .minTexelOffset = -16,
1119       .maxTexelOffset = 15,
1120       .minTexelGatherOffset = -32,
1121       .maxTexelGatherOffset = 31,
1122       .minInterpolationOffset = -0.5,
1123       .maxInterpolationOffset = 0.4375,
1124       .subPixelInterpolationOffsetBits = 4,
1125       .maxFramebufferWidth = (1 << 14),
1126       .maxFramebufferHeight = (1 << 14),
1127       .maxFramebufferLayers = (1 << 10),
1128       .framebufferColorSampleCounts = sample_counts,
1129       .framebufferDepthSampleCounts = sample_counts,
1130       .framebufferStencilSampleCounts = sample_counts,
1131       .framebufferNoAttachmentsSampleCounts = sample_counts,
1132       .maxColorAttachments = MAX_RTS,
1133       .sampledImageColorSampleCounts = sample_counts,
1134       .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1135       .sampledImageDepthSampleCounts = sample_counts,
1136       .sampledImageStencilSampleCounts = sample_counts,
1137       .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1138       .maxSampleMaskWords = 1,
1139       .timestampComputeAndGraphics = true,
1140       .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
1141       .maxClipDistances = 8,
1142       .maxCullDistances = 8,
1143       .maxCombinedClipAndCullDistances = 8,
1144       .discreteQueuePriorities = 2,
1145       .pointSizeRange = { 1, 4092 },
1146       .lineWidthRange = { 1.0, 1.0 },
1147       .pointSizeGranularity = 	0.0625,
1148       .lineWidthGranularity = 0.0,
1149       .strictLines = true,
1150       .standardSampleLocations = true,
1151       .optimalBufferCopyOffsetAlignment = 128,
1152       .optimalBufferCopyRowPitchAlignment = 128,
1153       .nonCoherentAtomSize = 64,
1154    };
1155 
1156    pProperties->properties = (VkPhysicalDeviceProperties) {
1157       .apiVersion = TU_API_VERSION,
1158       .driverVersion = vk_get_driver_version(),
1159       .vendorID = 0x5143,
1160       .deviceID = pdevice->dev_id.chip_id,
1161       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1162       .limits = limits,
1163       .sparseProperties = { 0 },
1164    };
1165 
1166    strcpy(pProperties->properties.deviceName, pdevice->name);
1167    memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1168 
1169    VkPhysicalDeviceVulkan11Properties core_1_1 = {
1170       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1171    };
1172    tu_get_physical_device_properties_1_1(pdevice, &core_1_1);
1173 
1174    VkPhysicalDeviceVulkan12Properties core_1_2 = {
1175       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1176    };
1177    tu_get_physical_device_properties_1_2(pdevice, &core_1_2);
1178 
1179    VkPhysicalDeviceVulkan13Properties core_1_3 = {
1180       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
1181    };
1182    tu_get_physical_device_properties_1_3(pdevice, &core_1_3);
1183 
1184    vk_foreach_struct(ext, pProperties->pNext)
1185    {
1186       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1187          continue;
1188       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1189          continue;
1190       if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
1191          continue;
1192 
1193       switch (ext->sType) {
1194       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1195          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1196             (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1197          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1198          break;
1199       }
1200       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1201          VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1202             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1203 
1204          properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1205          properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1206          properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1207          properties->maxTransformFeedbackStreamDataSize = 512;
1208          properties->maxTransformFeedbackBufferDataSize = 512;
1209          properties->maxTransformFeedbackBufferDataStride = 512;
1210          properties->transformFeedbackQueries = true;
1211          properties->transformFeedbackStreamsLinesTriangles = true;
1212          properties->transformFeedbackRasterizationStreamSelect = true;
1213          properties->transformFeedbackDraw = true;
1214          break;
1215       }
1216       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1217          VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1218             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1219          properties->sampleLocationSampleCounts = 0;
1220          if (pdevice->vk.supported_extensions.EXT_sample_locations) {
1221             properties->sampleLocationSampleCounts =
1222                VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
1223          }
1224          properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1225          properties->sampleLocationCoordinateRange[0] = 0.0f;
1226          properties->sampleLocationCoordinateRange[1] = 0.9375f;
1227          properties->sampleLocationSubPixelBits = 4;
1228          properties->variableSampleLocations = true;
1229          break;
1230       }
1231       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1232          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
1233             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1234          props->maxVertexAttribDivisor = UINT32_MAX;
1235          break;
1236       }
1237       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1238          VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (void *)ext;
1239          props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1240          break;
1241       }
1242       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
1243          VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
1244             (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
1245          properties->allowCommandBufferQueryCopies = false;
1246          break;
1247       }
1248       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1249          VkPhysicalDeviceRobustness2PropertiesEXT *props = (void *)ext;
1250          /* see write_buffer_descriptor() */
1251          props->robustStorageBufferAccessSizeAlignment = 4;
1252          /* see write_ubo_descriptor() */
1253          props->robustUniformBufferAccessSizeAlignment = 16;
1254          break;
1255       }
1256 
1257       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
1258          VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
1259             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
1260          properties->provokingVertexModePerPipeline = true;
1261          properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1262          break;
1263       }
1264       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1265          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1266             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1267          props->lineSubPixelPrecisionBits = 8;
1268          break;
1269       }
1270       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
1271          VkPhysicalDeviceDrmPropertiesEXT *props =
1272             (VkPhysicalDeviceDrmPropertiesEXT *)ext;
1273          props->hasPrimary = pdevice->has_master;
1274          props->primaryMajor = pdevice->master_major;
1275          props->primaryMinor = pdevice->master_minor;
1276 
1277          props->hasRender = pdevice->has_local;
1278          props->renderMajor = pdevice->local_major;
1279          props->renderMinor = pdevice->local_minor;
1280          break;
1281       }
1282 
1283       default:
1284          break;
1285       }
1286    }
1287 }
1288 
1289 static const VkQueueFamilyProperties tu_queue_family_properties = {
1290    .queueFlags =
1291       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1292    .queueCount = 1,
1293    .timestampValidBits = 48,
1294    .minImageTransferGranularity = { 1, 1, 1 },
1295 };
1296 
1297 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1298 tu_GetPhysicalDeviceQueueFamilyProperties2(
1299    VkPhysicalDevice physicalDevice,
1300    uint32_t *pQueueFamilyPropertyCount,
1301    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1302 {
1303    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1304                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
1305 
1306    vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1307    {
1308       p->queueFamilyProperties = tu_queue_family_properties;
1309    }
1310 }
1311 
1312 uint64_t
tu_get_system_heap_size()1313 tu_get_system_heap_size()
1314 {
1315    struct sysinfo info;
1316    sysinfo(&info);
1317 
1318    uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;
1319 
1320    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
1321     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
1322     */
1323    uint64_t available_ram;
1324    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1325       available_ram = total_ram / 2;
1326    else
1327       available_ram = total_ram * 3 / 4;
1328 
1329    return available_ram;
1330 }
1331 
1332 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1333 tu_get_budget_memory(struct tu_physical_device *physical_device)
1334 {
1335    uint64_t heap_size = physical_device->heap.size;
1336    uint64_t heap_used = physical_device->heap.used;
1337    uint64_t sys_available;
1338    ASSERTED bool has_available_memory =
1339       os_get_available_system_memory(&sys_available);
1340    assert(has_available_memory);
1341 
1342    /*
1343     * Let's not incite the app to starve the system: report at most 90% of
1344     * available system memory.
1345     */
1346    uint64_t heap_available = sys_available * 9 / 10;
1347    return MIN2(heap_size, heap_used + heap_available);
1348 }
1349 
1350 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1351 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1352                                       VkPhysicalDeviceMemoryProperties2 *props2)
1353 {
1354    TU_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1355 
1356    VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1357    props->memoryHeapCount = 1;
1358    props->memoryHeaps[0].size = physical_device->heap.size;
1359    props->memoryHeaps[0].flags = physical_device->heap.flags;
1360 
1361    props->memoryTypeCount = 1;
1362    props->memoryTypes[0].propertyFlags =
1363       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1364       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1365       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1366    props->memoryTypes[0].heapIndex = 0;
1367 
1368    vk_foreach_struct(ext, props2->pNext)
1369    {
1370       switch (ext->sType) {
1371       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1372          VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1373             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1374          memory_budget_props->heapUsage[0] = physical_device->heap.used;
1375          memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1376 
1377          /* The heapBudget and heapUsage values must be zero for array elements
1378           * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1379           */
1380          for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1381             memory_budget_props->heapBudget[i] = 0u;
1382             memory_budget_props->heapUsage[i] = 0u;
1383          }
1384          break;
1385       }
1386       default:
1387          break;
1388       }
1389    }
1390 }
1391 
1392 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info)1393 tu_queue_init(struct tu_device *device,
1394               struct tu_queue *queue,
1395               int idx,
1396               const VkDeviceQueueCreateInfo *create_info)
1397 {
1398    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1399    if (result != VK_SUCCESS)
1400       return result;
1401 
1402    queue->device = device;
1403 #ifndef TU_USE_KGSL
1404    queue->vk.driver_submit = tu_queue_submit;
1405 #endif
1406 
1407    int ret = tu_drm_submitqueue_new(device, 0, &queue->msm_queue_id);
1408    if (ret)
1409       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1410                                "submitqueue create failed");
1411 
1412    queue->fence = -1;
1413 
1414    return VK_SUCCESS;
1415 }
1416 
1417 static void
tu_queue_finish(struct tu_queue * queue)1418 tu_queue_finish(struct tu_queue *queue)
1419 {
1420    vk_queue_finish(&queue->vk);
1421    if (queue->fence >= 0)
1422       close(queue->fence);
1423    tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1424 }
1425 
1426 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1427 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1428 {
1429    /* This is based on the 19.2MHz always-on rbbm timer.
1430     *
1431     * TODO we should probably query this value from kernel..
1432     */
1433    return ts * (1000000000 / 19200000);
1434 }
1435 
1436 static void*
tu_trace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size)1437 tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
1438 {
1439    struct tu_device *device =
1440       container_of(utctx, struct tu_device, trace_context);
1441 
1442    struct tu_bo *bo;
1443    tu_bo_init_new(device, &bo, size, false);
1444 
1445    return bo;
1446 }
1447 
1448 static void
tu_trace_destroy_ts_buffer(struct u_trace_context * utctx,void * timestamps)1449 tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
1450 {
1451    struct tu_device *device =
1452       container_of(utctx, struct tu_device, trace_context);
1453    struct tu_bo *bo = timestamps;
1454 
1455    tu_bo_finish(device, bo);
1456 }
1457 
1458 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)1459 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1460                    unsigned idx, bool end_of_pipe)
1461 {
1462    struct tu_bo *bo = timestamps;
1463    struct tu_cs *ts_cs = cs;
1464 
1465    unsigned ts_offset = idx * sizeof(uint64_t);
1466    tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1467    tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
1468    tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1469    tu_cs_emit(ts_cs, 0x00000000);
1470 }
1471 
1472 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)1473 tu_trace_read_ts(struct u_trace_context *utctx,
1474                  void *timestamps, unsigned idx, void *flush_data)
1475 {
1476    struct tu_device *device =
1477       container_of(utctx, struct tu_device, trace_context);
1478    struct tu_bo *bo = timestamps;
1479    struct tu_u_trace_submission_data *submission_data = flush_data;
1480 
1481    /* Only need to stall on results for the first entry: */
1482    if (idx == 0) {
1483       tu_device_wait_u_trace(device, submission_data->syncobj);
1484    }
1485 
1486    if (tu_bo_map(device, bo) != VK_SUCCESS) {
1487       return U_TRACE_NO_TIMESTAMP;
1488    }
1489 
1490    uint64_t *ts = bo->map;
1491 
1492    /* Don't translate the no-timestamp marker: */
1493    if (ts[idx] == U_TRACE_NO_TIMESTAMP)
1494       return U_TRACE_NO_TIMESTAMP;
1495 
1496    return tu_device_ticks_to_ns(device, ts[idx]);
1497 }
1498 
1499 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1500 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1501 {
1502    struct tu_device *device =
1503       container_of(utctx, struct tu_device, trace_context);
1504    struct tu_u_trace_submission_data *submission_data = flush_data;
1505 
1506    tu_u_trace_submission_data_finish(device, submission_data);
1507 }
1508 
1509 void
tu_copy_timestamp_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint32_t from_offset,void * ts_to,uint32_t to_offset,uint32_t count)1510 tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
1511                          void *ts_from, uint32_t from_offset,
1512                          void *ts_to, uint32_t to_offset,
1513                          uint32_t count)
1514 {
1515    struct tu_cs *cs = cmdstream;
1516    struct tu_bo *bo_from = ts_from;
1517    struct tu_bo *bo_to = ts_to;
1518 
1519    tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1520    tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
1521    tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
1522    tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
1523 }
1524 
1525 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1526 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1527                             struct u_trace **trace_copy)
1528 {
1529    *cs = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct tu_cs), 8,
1530                    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1531 
1532    if (*cs == NULL) {
1533       return VK_ERROR_OUT_OF_HOST_MEMORY;
1534    }
1535 
1536    tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1537               list_length(&cmdbuf->trace.trace_chunks) * 6 + 3);
1538 
1539    tu_cs_begin(*cs);
1540 
1541    tu_cs_emit_wfi(*cs);
1542    tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1543 
1544    *trace_copy = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1545                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1546 
1547    if (*trace_copy == NULL) {
1548       return VK_ERROR_OUT_OF_HOST_MEMORY;
1549    }
1550 
1551    u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1552    u_trace_clone_append(u_trace_begin_iterator(&cmdbuf->trace),
1553                         u_trace_end_iterator(&cmdbuf->trace),
1554                         *trace_copy, *cs,
1555                         tu_copy_timestamp_buffer);
1556 
1557    tu_cs_emit_wfi(*cs);
1558 
1559    tu_cs_end(*cs);
1560 
1561    return VK_SUCCESS;
1562 }
1563 
1564 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1565 tu_u_trace_submission_data_create(
1566    struct tu_device *device,
1567    struct tu_cmd_buffer **cmd_buffers,
1568    uint32_t cmd_buffer_count,
1569    struct tu_u_trace_submission_data **submission_data)
1570 {
1571    *submission_data =
1572       vk_zalloc(&device->vk.alloc,
1573                 sizeof(struct tu_u_trace_submission_data), 8,
1574                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1575 
1576    if (!(*submission_data)) {
1577       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1578    }
1579 
1580    struct tu_u_trace_submission_data *data = *submission_data;
1581 
1582    data->cmd_trace_data =
1583       vk_zalloc(&device->vk.alloc,
1584                 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1585                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1586 
1587    if (!data->cmd_trace_data) {
1588       goto fail;
1589    }
1590 
1591    data->cmd_buffer_count = cmd_buffer_count;
1592    data->last_buffer_with_tracepoints = -1;
1593 
1594    for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1595       struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1596 
1597       if (!u_trace_has_points(&cmdbuf->trace))
1598          continue;
1599 
1600       data->last_buffer_with_tracepoints = i;
1601 
1602       if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1603          /* A single command buffer could be submitted several times, but we
1604           * already baked timestamp iova addresses and trace points are
1605           * single-use. Therefor we have to copy trace points and create
1606           * a new timestamp buffer on every submit of reusable command buffer.
1607           */
1608          if (tu_create_copy_timestamp_cs(cmdbuf,
1609                &data->cmd_trace_data[i].timestamp_copy_cs,
1610                &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1611             goto fail;
1612          }
1613 
1614          assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1615       } else {
1616          data->cmd_trace_data[i].trace = &cmdbuf->trace;
1617       }
1618    }
1619 
1620    assert(data->last_buffer_with_tracepoints != -1);
1621 
1622    return VK_SUCCESS;
1623 
1624 fail:
1625    tu_u_trace_submission_data_finish(device, data);
1626    *submission_data = NULL;
1627 
1628    return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1629 }
1630 
1631 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1632 tu_u_trace_submission_data_finish(
1633    struct tu_device *device,
1634    struct tu_u_trace_submission_data *submission_data)
1635 {
1636    for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1637       /* Only if we had to create a copy of trace we should free it */
1638       struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
1639       if (cmd_data->timestamp_copy_cs) {
1640          tu_cs_finish(cmd_data->timestamp_copy_cs);
1641          vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
1642 
1643          u_trace_fini(cmd_data->trace);
1644          vk_free(&device->vk.alloc, cmd_data->trace);
1645       }
1646    }
1647 
1648    vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
1649    vk_free(&device->vk.alloc, submission_data->syncobj);
1650    vk_free(&device->vk.alloc, submission_data);
1651 }
1652 
1653 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1654 tu_CreateDevice(VkPhysicalDevice physicalDevice,
1655                 const VkDeviceCreateInfo *pCreateInfo,
1656                 const VkAllocationCallbacks *pAllocator,
1657                 VkDevice *pDevice)
1658 {
1659    TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
1660    VkResult result;
1661    struct tu_device *device;
1662    bool custom_border_colors = false;
1663    bool perf_query_pools = false;
1664    bool robust_buffer_access2 = false;
1665 
1666    vk_foreach_struct_const(ext, pCreateInfo->pNext) {
1667       switch (ext->sType) {
1668       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1669          const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
1670          custom_border_colors = border_color_features->customBorderColors;
1671          break;
1672       }
1673       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1674          const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1675             (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1676          perf_query_pools = feature->performanceCounterQueryPools;
1677          break;
1678       }
1679       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1680          VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1681          robust_buffer_access2 = features->robustBufferAccess2;
1682          break;
1683       }
1684       default:
1685          break;
1686       }
1687    }
1688 
1689    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
1690                        sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1691    if (!device)
1692       return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1693 
1694    struct vk_device_dispatch_table dispatch_table;
1695    vk_device_dispatch_table_from_entrypoints(
1696       &dispatch_table, &tu_device_entrypoints, true);
1697    vk_device_dispatch_table_from_entrypoints(
1698       &dispatch_table, &wsi_device_entrypoints, false);
1699 
1700    result = vk_device_init(&device->vk, &physical_device->vk,
1701                            &dispatch_table, pCreateInfo, pAllocator);
1702    if (result != VK_SUCCESS) {
1703       vk_free(&device->vk.alloc, device);
1704       return vk_startup_errorf(physical_device->instance, result,
1705                                "vk_device_init failed");
1706    }
1707 
1708    device->instance = physical_device->instance;
1709    device->physical_device = physical_device;
1710    device->fd = physical_device->local_fd;
1711    device->vk.check_status = tu_device_check_status;
1712 
1713    mtx_init(&device->bo_mutex, mtx_plain);
1714    u_rwlock_init(&device->dma_bo_lock);
1715    pthread_mutex_init(&device->submit_mutex, NULL);
1716 
1717 #ifndef TU_USE_KGSL
1718    vk_device_set_drm_fd(&device->vk, device->fd);
1719 #endif
1720 
1721    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1722       const VkDeviceQueueCreateInfo *queue_create =
1723          &pCreateInfo->pQueueCreateInfos[i];
1724       uint32_t qfi = queue_create->queueFamilyIndex;
1725       device->queues[qfi] = vk_alloc(
1726          &device->vk.alloc, queue_create->queueCount * sizeof(struct tu_queue),
1727          8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1728       if (!device->queues[qfi]) {
1729          result = vk_startup_errorf(physical_device->instance,
1730                                     VK_ERROR_OUT_OF_HOST_MEMORY,
1731                                     "OOM");
1732          goto fail_queues;
1733       }
1734 
1735       memset(device->queues[qfi], 0,
1736              queue_create->queueCount * sizeof(struct tu_queue));
1737 
1738       device->queue_count[qfi] = queue_create->queueCount;
1739 
1740       for (unsigned q = 0; q < queue_create->queueCount; q++) {
1741          result = tu_queue_init(device, &device->queues[qfi][q], q,
1742                                 queue_create);
1743          if (result != VK_SUCCESS)
1744             goto fail_queues;
1745       }
1746    }
1747 
1748    device->compiler =
1749       ir3_compiler_create(NULL, &physical_device->dev_id,
1750                           &(struct ir3_compiler_options) {
1751                               .robust_ubo_access = robust_buffer_access2,
1752                               .push_ubo_with_preamble = true,
1753                            });
1754    if (!device->compiler) {
1755       result = vk_startup_errorf(physical_device->instance,
1756                                  VK_ERROR_INITIALIZATION_FAILED,
1757                                  "failed to initialize ir3 compiler");
1758       goto fail_queues;
1759    }
1760 
1761    /* Initialize sparse array for refcounting imported BOs */
1762    util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
1763 
1764    /* initial sizes, these will increase if there is overflow */
1765    device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
1766    device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
1767 
1768    uint32_t global_size = sizeof(struct tu6_global);
1769    if (custom_border_colors)
1770       global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
1771 
1772    result = tu_bo_init_new(device, &device->global_bo, global_size,
1773                            TU_BO_ALLOC_ALLOW_DUMP);
1774    if (result != VK_SUCCESS) {
1775       vk_startup_errorf(device->instance, result, "BO init");
1776       goto fail_global_bo;
1777    }
1778 
1779    result = tu_bo_map(device, device->global_bo);
1780    if (result != VK_SUCCESS) {
1781       vk_startup_errorf(device->instance, result, "BO map");
1782       goto fail_global_bo_map;
1783    }
1784 
1785    struct tu6_global *global = device->global_bo->map;
1786    tu_init_clear_blit_shaders(device);
1787    global->predicate = 0;
1788    tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK],
1789                          &(VkClearColorValue) {}, false);
1790    tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK],
1791                          &(VkClearColorValue) {}, true);
1792    tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK],
1793                          &(VkClearColorValue) { .float32[3] = 1.0f }, false);
1794    tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_OPAQUE_BLACK],
1795                          &(VkClearColorValue) { .int32[3] = 1 }, true);
1796    tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE],
1797                          &(VkClearColorValue) { .float32[0 ... 3] = 1.0f }, false);
1798    tu6_pack_border_color(&global->bcolor_builtin[VK_BORDER_COLOR_INT_OPAQUE_WHITE],
1799                          &(VkClearColorValue) { .int32[0 ... 3] = 1 }, true);
1800 
1801    /* initialize to ones so ffs can be used to find unused slots */
1802    BITSET_ONES(device->custom_border_color);
1803 
1804    VkPipelineCacheCreateInfo ci;
1805    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
1806    ci.pNext = NULL;
1807    ci.flags = 0;
1808    ci.pInitialData = NULL;
1809    ci.initialDataSize = 0;
1810    VkPipelineCache pc;
1811    result =
1812       tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc);
1813    if (result != VK_SUCCESS) {
1814       vk_startup_errorf(device->instance, result, "create pipeline cache failed");
1815       goto fail_pipeline_cache;
1816    }
1817 
1818    if (perf_query_pools) {
1819       /* Prepare command streams setting pass index to the PERF_CNTRS_REG
1820        * from 0 to 31. One of these will be picked up at cmd submit time
1821        * when the perf query is executed.
1822        */
1823       struct tu_cs *cs;
1824 
1825       if (!(device->perfcntrs_pass_cs = calloc(1, sizeof(struct tu_cs)))) {
1826          result = vk_startup_errorf(device->instance,
1827                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1828          goto fail_perfcntrs_pass_alloc;
1829       }
1830 
1831       device->perfcntrs_pass_cs_entries = calloc(32, sizeof(struct tu_cs_entry));
1832       if (!device->perfcntrs_pass_cs_entries) {
1833          result = vk_startup_errorf(device->instance,
1834                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1835          goto fail_perfcntrs_pass_entries_alloc;
1836       }
1837 
1838       cs = device->perfcntrs_pass_cs;
1839       tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96);
1840 
1841       for (unsigned i = 0; i < 32; i++) {
1842          struct tu_cs sub_cs;
1843 
1844          result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
1845          if (result != VK_SUCCESS) {
1846             vk_startup_errorf(device->instance, result,
1847                   "failed to allocate commands streams");
1848             goto fail_prepare_perfcntrs_pass_cs;
1849          }
1850 
1851          tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
1852          tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
1853 
1854          device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
1855       }
1856    }
1857 
1858    /* Initialize a condition variable for timeline semaphore */
1859    pthread_condattr_t condattr;
1860    if (pthread_condattr_init(&condattr) != 0) {
1861       result = vk_startup_errorf(physical_device->instance,
1862                                  VK_ERROR_INITIALIZATION_FAILED,
1863                                  "pthread condattr init");
1864       goto fail_timeline_cond;
1865    }
1866    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
1867       pthread_condattr_destroy(&condattr);
1868       result = vk_startup_errorf(physical_device->instance,
1869                                  VK_ERROR_INITIALIZATION_FAILED,
1870                                  "pthread condattr clock setup");
1871       goto fail_timeline_cond;
1872    }
1873    if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
1874       pthread_condattr_destroy(&condattr);
1875       result = vk_startup_errorf(physical_device->instance,
1876                                  VK_ERROR_INITIALIZATION_FAILED,
1877                                  "pthread cond init");
1878       goto fail_timeline_cond;
1879    }
1880    pthread_condattr_destroy(&condattr);
1881 
1882    device->mem_cache = tu_pipeline_cache_from_handle(pc);
1883 
1884    result = tu_autotune_init(&device->autotune, device);
1885    if (result != VK_SUCCESS) {
1886       goto fail_timeline_cond;
1887    }
1888 
1889    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
1890       mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
1891 
1892    mtx_init(&device->mutex, mtx_plain);
1893 
1894    device->submit_count = 0;
1895    u_trace_context_init(&device->trace_context, device,
1896                      tu_trace_create_ts_buffer,
1897                      tu_trace_destroy_ts_buffer,
1898                      tu_trace_record_ts,
1899                      tu_trace_read_ts,
1900                      tu_trace_delete_flush_data);
1901 
1902    *pDevice = tu_device_to_handle(device);
1903    return VK_SUCCESS;
1904 
1905 fail_timeline_cond:
1906 fail_prepare_perfcntrs_pass_cs:
1907    free(device->perfcntrs_pass_cs_entries);
1908    tu_cs_finish(device->perfcntrs_pass_cs);
1909 fail_perfcntrs_pass_entries_alloc:
1910    free(device->perfcntrs_pass_cs);
1911 fail_perfcntrs_pass_alloc:
1912    tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
1913 fail_pipeline_cache:
1914    tu_destroy_clear_blit_shaders(device);
1915 fail_global_bo_map:
1916    tu_bo_finish(device, device->global_bo);
1917    vk_free(&device->vk.alloc, device->bo_list);
1918 fail_global_bo:
1919    ir3_compiler_destroy(device->compiler);
1920    util_sparse_array_finish(&device->bo_map);
1921 
1922 fail_queues:
1923    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1924       for (unsigned q = 0; q < device->queue_count[i]; q++)
1925          tu_queue_finish(&device->queues[i][q]);
1926       if (device->queue_count[i])
1927          vk_free(&device->vk.alloc, device->queues[i]);
1928    }
1929 
1930    u_rwlock_destroy(&device->dma_bo_lock);
1931    vk_device_finish(&device->vk);
1932    vk_free(&device->vk.alloc, device);
1933    return result;
1934 }
1935 
1936 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1937 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
1938 {
1939    TU_FROM_HANDLE(tu_device, device, _device);
1940 
1941    if (!device)
1942       return;
1943 
1944    u_trace_context_fini(&device->trace_context);
1945 
1946    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1947       for (unsigned q = 0; q < device->queue_count[i]; q++)
1948          tu_queue_finish(&device->queues[i][q]);
1949       if (device->queue_count[i])
1950          vk_free(&device->vk.alloc, device->queues[i]);
1951    }
1952 
1953    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
1954       if (device->scratch_bos[i].initialized)
1955          tu_bo_finish(device, device->scratch_bos[i].bo);
1956    }
1957 
1958    tu_destroy_clear_blit_shaders(device);
1959 
1960    ir3_compiler_destroy(device->compiler);
1961 
1962    VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache);
1963    tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL);
1964 
1965    if (device->perfcntrs_pass_cs) {
1966       free(device->perfcntrs_pass_cs_entries);
1967       tu_cs_finish(device->perfcntrs_pass_cs);
1968       free(device->perfcntrs_pass_cs);
1969    }
1970 
1971    tu_autotune_fini(&device->autotune, device);
1972 
1973    util_sparse_array_finish(&device->bo_map);
1974    u_rwlock_destroy(&device->dma_bo_lock);
1975 
1976    pthread_cond_destroy(&device->timeline_cond);
1977    vk_free(&device->vk.alloc, device->bo_list);
1978    vk_device_finish(&device->vk);
1979    vk_free(&device->vk.alloc, device);
1980 }
1981 
1982 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)1983 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
1984 {
1985    unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
1986    unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
1987    assert(index < ARRAY_SIZE(dev->scratch_bos));
1988 
1989    for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
1990       if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
1991          /* Fast path: just return the already-allocated BO. */
1992          *bo = dev->scratch_bos[i].bo;
1993          return VK_SUCCESS;
1994       }
1995    }
1996 
1997    /* Slow path: actually allocate the BO. We take a lock because the process
1998     * of allocating it is slow, and we don't want to block the CPU while it
1999     * finishes.
2000    */
2001    mtx_lock(&dev->scratch_bos[index].construct_mtx);
2002 
2003    /* Another thread may have allocated it already while we were waiting on
2004     * the lock. We need to check this in order to avoid double-allocating.
2005     */
2006    if (dev->scratch_bos[index].initialized) {
2007       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2008       *bo = dev->scratch_bos[index].bo;
2009       return VK_SUCCESS;
2010    }
2011 
2012    unsigned bo_size = 1ull << size_log2;
2013    VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size,
2014                                     TU_BO_ALLOC_NO_FLAGS);
2015    if (result != VK_SUCCESS) {
2016       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2017       return result;
2018    }
2019 
2020    p_atomic_set(&dev->scratch_bos[index].initialized, true);
2021 
2022    mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2023 
2024    *bo = dev->scratch_bos[index].bo;
2025    return VK_SUCCESS;
2026 }
2027 
2028 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2029 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2030                                     VkLayerProperties *pProperties)
2031 {
2032    *pPropertyCount = 0;
2033    return VK_SUCCESS;
2034 }
2035 
2036 /* Only used for kgsl since drm started using common implementation */
2037 #ifdef TU_USE_KGSL
2038 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueWaitIdle(VkQueue _queue)2039 tu_QueueWaitIdle(VkQueue _queue)
2040 {
2041    TU_FROM_HANDLE(tu_queue, queue, _queue);
2042 
2043    if (vk_device_is_lost(&queue->device->vk))
2044       return VK_ERROR_DEVICE_LOST;
2045 
2046    if (queue->fence < 0)
2047       return VK_SUCCESS;
2048 
2049    struct pollfd fds = { .fd = queue->fence, .events = POLLIN };
2050    int ret;
2051    do {
2052       ret = poll(&fds, 1, -1);
2053    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2054 
2055    /* TODO: otherwise set device lost ? */
2056    assert(ret == 1 && !(fds.revents & (POLLERR | POLLNVAL)));
2057 
2058    close(queue->fence);
2059    queue->fence = -1;
2060    return VK_SUCCESS;
2061 }
2062 #endif
2063 
2064 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2065 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2066                                         uint32_t *pPropertyCount,
2067                                         VkExtensionProperties *pProperties)
2068 {
2069    if (pLayerName)
2070       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2071 
2072    return vk_enumerate_instance_extension_properties(
2073       &tu_instance_extensions_supported, pPropertyCount, pProperties);
2074 }
2075 
2076 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2077 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2078 {
2079    TU_FROM_HANDLE(tu_instance, instance, _instance);
2080    return vk_instance_get_proc_addr(&instance->vk,
2081                                     &tu_instance_entrypoints,
2082                                     pName);
2083 }
2084 
2085 /* The loader wants us to expose a second GetInstanceProcAddr function
2086  * to work around certain LD_PRELOAD issues seen in apps.
2087  */
2088 PUBLIC
2089 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2090 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName);
2091 
2092 PUBLIC
2093 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2094 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2095 {
2096    return tu_GetInstanceProcAddr(instance, pName);
2097 }
2098 
2099 /* With version 4+ of the loader interface the ICD should expose
2100  * vk_icdGetPhysicalDeviceProcAddr()
2101  */
2102 PUBLIC
2103 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2104 vk_icdGetPhysicalDeviceProcAddr(VkInstance  _instance,
2105                                 const char* pName);
2106 
2107 PFN_vkVoidFunction
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)2108 vk_icdGetPhysicalDeviceProcAddr(VkInstance  _instance,
2109                                 const char* pName)
2110 {
2111    TU_FROM_HANDLE(tu_instance, instance, _instance);
2112 
2113    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2114 }
2115 
2116 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2117 tu_AllocateMemory(VkDevice _device,
2118                   const VkMemoryAllocateInfo *pAllocateInfo,
2119                   const VkAllocationCallbacks *pAllocator,
2120                   VkDeviceMemory *pMem)
2121 {
2122    TU_FROM_HANDLE(tu_device, device, _device);
2123    struct tu_device_memory *mem;
2124    VkResult result;
2125 
2126    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2127 
2128    if (pAllocateInfo->allocationSize == 0) {
2129       /* Apparently, this is allowed */
2130       *pMem = VK_NULL_HANDLE;
2131       return VK_SUCCESS;
2132    }
2133 
2134    struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2135    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2136    if (mem_heap_used > mem_heap->size)
2137       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2138 
2139    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2140                          VK_OBJECT_TYPE_DEVICE_MEMORY);
2141    if (mem == NULL)
2142       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2143 
2144    const VkImportMemoryFdInfoKHR *fd_info =
2145       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2146    if (fd_info && !fd_info->handleType)
2147       fd_info = NULL;
2148 
2149    if (fd_info) {
2150       assert(fd_info->handleType ==
2151                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2152              fd_info->handleType ==
2153                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2154 
2155       /*
2156        * TODO Importing the same fd twice gives us the same handle without
2157        * reference counting.  We need to maintain a per-instance handle-to-bo
2158        * table and add reference count to tu_bo.
2159        */
2160       result = tu_bo_init_dmabuf(device, &mem->bo,
2161                                  pAllocateInfo->allocationSize, fd_info->fd);
2162       if (result == VK_SUCCESS) {
2163          /* take ownership and close the fd */
2164          close(fd_info->fd);
2165       }
2166    } else {
2167       result =
2168          tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize,
2169                         TU_BO_ALLOC_NO_FLAGS);
2170    }
2171 
2172 
2173    if (result == VK_SUCCESS) {
2174       mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2175       if (mem_heap_used > mem_heap->size) {
2176          p_atomic_add(&mem_heap->used, -mem->bo->size);
2177          tu_bo_finish(device, mem->bo);
2178          result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2179                             "Out of heap memory");
2180       }
2181    }
2182 
2183    if (result != VK_SUCCESS) {
2184       vk_object_free(&device->vk, pAllocator, mem);
2185       return result;
2186    }
2187 
2188    /* Track in the device whether our BO list contains any implicit-sync BOs, so
2189     * we can suppress implicit sync on non-WSI usage.
2190     */
2191    const struct wsi_memory_allocate_info *wsi_info =
2192       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2193    if (wsi_info && wsi_info->implicit_sync) {
2194       mtx_lock(&device->bo_mutex);
2195       if (!mem->bo->implicit_sync) {
2196          mem->bo->implicit_sync = true;
2197          device->implicit_sync_bo_count++;
2198       }
2199       mtx_unlock(&device->bo_mutex);
2200    }
2201 
2202    *pMem = tu_device_memory_to_handle(mem);
2203 
2204    return VK_SUCCESS;
2205 }
2206 
2207 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2208 tu_FreeMemory(VkDevice _device,
2209               VkDeviceMemory _mem,
2210               const VkAllocationCallbacks *pAllocator)
2211 {
2212    TU_FROM_HANDLE(tu_device, device, _device);
2213    TU_FROM_HANDLE(tu_device_memory, mem, _mem);
2214 
2215    if (mem == NULL)
2216       return;
2217 
2218    p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2219    tu_bo_finish(device, mem->bo);
2220    vk_object_free(&device->vk, pAllocator, mem);
2221 }
2222 
2223 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2224 tu_MapMemory(VkDevice _device,
2225              VkDeviceMemory _memory,
2226              VkDeviceSize offset,
2227              VkDeviceSize size,
2228              VkMemoryMapFlags flags,
2229              void **ppData)
2230 {
2231    TU_FROM_HANDLE(tu_device, device, _device);
2232    TU_FROM_HANDLE(tu_device_memory, mem, _memory);
2233    VkResult result;
2234 
2235    if (mem == NULL) {
2236       *ppData = NULL;
2237       return VK_SUCCESS;
2238    }
2239 
2240    if (!mem->bo->map) {
2241       result = tu_bo_map(device, mem->bo);
2242       if (result != VK_SUCCESS)
2243          return result;
2244    }
2245 
2246    *ppData = mem->bo->map + offset;
2247    return VK_SUCCESS;
2248 }
2249 
2250 VKAPI_ATTR void VKAPI_CALL
tu_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2251 tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2252 {
2253    /* TODO: unmap here instead of waiting for FreeMemory */
2254 }
2255 
2256 VKAPI_ATTR VkResult VKAPI_CALL
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2257 tu_FlushMappedMemoryRanges(VkDevice _device,
2258                            uint32_t memoryRangeCount,
2259                            const VkMappedMemoryRange *pMemoryRanges)
2260 {
2261    return VK_SUCCESS;
2262 }
2263 
2264 VKAPI_ATTR VkResult VKAPI_CALL
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2265 tu_InvalidateMappedMemoryRanges(VkDevice _device,
2266                                 uint32_t memoryRangeCount,
2267                                 const VkMappedMemoryRange *pMemoryRanges)
2268 {
2269    return VK_SUCCESS;
2270 }
2271 
2272 VKAPI_ATTR void VKAPI_CALL
tu_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2273 tu_GetBufferMemoryRequirements2(
2274    VkDevice device,
2275    const VkBufferMemoryRequirementsInfo2 *pInfo,
2276    VkMemoryRequirements2 *pMemoryRequirements)
2277 {
2278    TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2279 
2280    pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2281       .memoryTypeBits = 1,
2282       .alignment = 64,
2283       .size = MAX2(align64(buffer->size, 64), buffer->size),
2284    };
2285 
2286    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2287       switch (ext->sType) {
2288       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2289          VkMemoryDedicatedRequirements *req =
2290             (VkMemoryDedicatedRequirements *) ext;
2291          req->requiresDedicatedAllocation = false;
2292          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2293          break;
2294       }
2295       default:
2296          break;
2297       }
2298    }
2299 }
2300 
2301 VKAPI_ATTR void VKAPI_CALL
tu_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2302 tu_GetImageMemoryRequirements2(VkDevice device,
2303                                const VkImageMemoryRequirementsInfo2 *pInfo,
2304                                VkMemoryRequirements2 *pMemoryRequirements)
2305 {
2306    TU_FROM_HANDLE(tu_image, image, pInfo->image);
2307 
2308    pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2309       .memoryTypeBits = 1,
2310       .alignment = image->layout[0].base_align,
2311       .size = image->total_size
2312    };
2313 
2314    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2315       switch (ext->sType) {
2316       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2317          VkMemoryDedicatedRequirements *req =
2318             (VkMemoryDedicatedRequirements *) ext;
2319          req->requiresDedicatedAllocation = image->shareable;
2320          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2321          break;
2322       }
2323       default:
2324          break;
2325       }
2326    }
2327 }
2328 
2329 VKAPI_ATTR void VKAPI_CALL
tu_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2330 tu_GetImageSparseMemoryRequirements2(
2331    VkDevice device,
2332    const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2333    uint32_t *pSparseMemoryRequirementCount,
2334    VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2335 {
2336    tu_stub();
2337 }
2338 
2339 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2340 tu_GetDeviceMemoryCommitment(VkDevice device,
2341                              VkDeviceMemory memory,
2342                              VkDeviceSize *pCommittedMemoryInBytes)
2343 {
2344    *pCommittedMemoryInBytes = 0;
2345 }
2346 
2347 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2348 tu_BindBufferMemory2(VkDevice device,
2349                      uint32_t bindInfoCount,
2350                      const VkBindBufferMemoryInfo *pBindInfos)
2351 {
2352    for (uint32_t i = 0; i < bindInfoCount; ++i) {
2353       TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2354       TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
2355 
2356       if (mem) {
2357          buffer->bo = mem->bo;
2358          buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2359       } else {
2360          buffer->bo = NULL;
2361       }
2362    }
2363    return VK_SUCCESS;
2364 }
2365 
2366 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2367 tu_BindImageMemory2(VkDevice device,
2368                     uint32_t bindInfoCount,
2369                     const VkBindImageMemoryInfo *pBindInfos)
2370 {
2371    for (uint32_t i = 0; i < bindInfoCount; ++i) {
2372       TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
2373       TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2374 
2375       if (mem) {
2376          image->bo = mem->bo;
2377          image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2378       } else {
2379          image->bo = NULL;
2380          image->iova = 0;
2381       }
2382    }
2383 
2384    return VK_SUCCESS;
2385 }
2386 
2387 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)2388 tu_QueueBindSparse(VkQueue _queue,
2389                    uint32_t bindInfoCount,
2390                    const VkBindSparseInfo *pBindInfo,
2391                    VkFence _fence)
2392 {
2393    return VK_SUCCESS;
2394 }
2395 
2396 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2397 tu_CreateEvent(VkDevice _device,
2398                const VkEventCreateInfo *pCreateInfo,
2399                const VkAllocationCallbacks *pAllocator,
2400                VkEvent *pEvent)
2401 {
2402    TU_FROM_HANDLE(tu_device, device, _device);
2403 
2404    struct tu_event *event =
2405          vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
2406                          VK_OBJECT_TYPE_EVENT);
2407    if (!event)
2408       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2409 
2410    VkResult result = tu_bo_init_new(device, &event->bo, 0x1000,
2411                                     TU_BO_ALLOC_NO_FLAGS);
2412    if (result != VK_SUCCESS)
2413       goto fail_alloc;
2414 
2415    result = tu_bo_map(device, event->bo);
2416    if (result != VK_SUCCESS)
2417       goto fail_map;
2418 
2419    *pEvent = tu_event_to_handle(event);
2420 
2421    return VK_SUCCESS;
2422 
2423 fail_map:
2424    tu_bo_finish(device, event->bo);
2425 fail_alloc:
2426    vk_object_free(&device->vk, pAllocator, event);
2427    return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2428 }
2429 
2430 VKAPI_ATTR void VKAPI_CALL
tu_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2431 tu_DestroyEvent(VkDevice _device,
2432                 VkEvent _event,
2433                 const VkAllocationCallbacks *pAllocator)
2434 {
2435    TU_FROM_HANDLE(tu_device, device, _device);
2436    TU_FROM_HANDLE(tu_event, event, _event);
2437 
2438    if (!event)
2439       return;
2440 
2441    tu_bo_finish(device, event->bo);
2442    vk_object_free(&device->vk, pAllocator, event);
2443 }
2444 
2445 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetEventStatus(VkDevice _device,VkEvent _event)2446 tu_GetEventStatus(VkDevice _device, VkEvent _event)
2447 {
2448    TU_FROM_HANDLE(tu_event, event, _event);
2449 
2450    if (*(uint64_t*) event->bo->map == 1)
2451       return VK_EVENT_SET;
2452    return VK_EVENT_RESET;
2453 }
2454 
2455 VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device,VkEvent _event)2456 tu_SetEvent(VkDevice _device, VkEvent _event)
2457 {
2458    TU_FROM_HANDLE(tu_event, event, _event);
2459    *(uint64_t*) event->bo->map = 1;
2460 
2461    return VK_SUCCESS;
2462 }
2463 
2464 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device,VkEvent _event)2465 tu_ResetEvent(VkDevice _device, VkEvent _event)
2466 {
2467    TU_FROM_HANDLE(tu_event, event, _event);
2468    *(uint64_t*) event->bo->map = 0;
2469 
2470    return VK_SUCCESS;
2471 }
2472 
2473 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2474 tu_CreateBuffer(VkDevice _device,
2475                 const VkBufferCreateInfo *pCreateInfo,
2476                 const VkAllocationCallbacks *pAllocator,
2477                 VkBuffer *pBuffer)
2478 {
2479    TU_FROM_HANDLE(tu_device, device, _device);
2480    struct tu_buffer *buffer;
2481 
2482    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2483 
2484    buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
2485                             VK_OBJECT_TYPE_BUFFER);
2486    if (buffer == NULL)
2487       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2488 
2489    buffer->size = pCreateInfo->size;
2490    buffer->usage = pCreateInfo->usage;
2491    buffer->flags = pCreateInfo->flags;
2492 
2493    *pBuffer = tu_buffer_to_handle(buffer);
2494 
2495    return VK_SUCCESS;
2496 }
2497 
2498 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2499 tu_DestroyBuffer(VkDevice _device,
2500                  VkBuffer _buffer,
2501                  const VkAllocationCallbacks *pAllocator)
2502 {
2503    TU_FROM_HANDLE(tu_device, device, _device);
2504    TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2505 
2506    if (!buffer)
2507       return;
2508 
2509    vk_object_free(&device->vk, pAllocator, buffer);
2510 }
2511 
2512 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2513 tu_CreateFramebuffer(VkDevice _device,
2514                      const VkFramebufferCreateInfo *pCreateInfo,
2515                      const VkAllocationCallbacks *pAllocator,
2516                      VkFramebuffer *pFramebuffer)
2517 {
2518    TU_FROM_HANDLE(tu_device, device, _device);
2519    TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
2520    struct tu_framebuffer *framebuffer;
2521 
2522    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2523 
2524    bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
2525 
2526    size_t size = sizeof(*framebuffer);
2527    if (!imageless)
2528       size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
2529    framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
2530                                  VK_OBJECT_TYPE_FRAMEBUFFER);
2531    if (framebuffer == NULL)
2532       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2533 
2534    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2535    framebuffer->width = pCreateInfo->width;
2536    framebuffer->height = pCreateInfo->height;
2537    framebuffer->layers = pCreateInfo->layers;
2538 
2539    if (!imageless) {
2540       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2541          VkImageView _iview = pCreateInfo->pAttachments[i];
2542          struct tu_image_view *iview = tu_image_view_from_handle(_iview);
2543          framebuffer->attachments[i].attachment = iview;
2544       }
2545    }
2546 
2547    tu_framebuffer_tiling_config(framebuffer, device, pass);
2548 
2549    *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
2550    return VK_SUCCESS;
2551 }
2552 
2553 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2554 tu_DestroyFramebuffer(VkDevice _device,
2555                       VkFramebuffer _fb,
2556                       const VkAllocationCallbacks *pAllocator)
2557 {
2558    TU_FROM_HANDLE(tu_device, device, _device);
2559    TU_FROM_HANDLE(tu_framebuffer, fb, _fb);
2560 
2561    if (!fb)
2562       return;
2563 
2564    vk_object_free(&device->vk, pAllocator, fb);
2565 }
2566 
2567 static void
tu_init_sampler(struct tu_device * device,struct tu_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)2568 tu_init_sampler(struct tu_device *device,
2569                 struct tu_sampler *sampler,
2570                 const VkSamplerCreateInfo *pCreateInfo)
2571 {
2572    const struct VkSamplerReductionModeCreateInfo *reduction =
2573       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
2574    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
2575       vk_find_struct_const(pCreateInfo->pNext,  SAMPLER_YCBCR_CONVERSION_INFO);
2576    const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
2577       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
2578    /* for non-custom border colors, the VK enum is translated directly to an offset in
2579     * the border color buffer. custom border colors are located immediately after the
2580     * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added.
2581     */
2582    uint32_t border_color = (unsigned) pCreateInfo->borderColor;
2583    if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
2584        pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
2585       mtx_lock(&device->mutex);
2586       border_color = BITSET_FFS(device->custom_border_color);
2587       BITSET_CLEAR(device->custom_border_color, border_color);
2588       mtx_unlock(&device->mutex);
2589       tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
2590                             &custom_border_color->customBorderColor,
2591                             pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
2592       border_color += TU_BORDER_COLOR_BUILTIN;
2593    }
2594 
2595    unsigned aniso = pCreateInfo->anisotropyEnable ?
2596       util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
2597    bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
2598    float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
2599    float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
2600 
2601    sampler->descriptor[0] =
2602       COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
2603       A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
2604       A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
2605       A6XX_TEX_SAMP_0_ANISO(aniso) |
2606       A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
2607       A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
2608       A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
2609       A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
2610    sampler->descriptor[1] =
2611       /* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
2612       COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
2613       A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
2614       A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
2615       COND(pCreateInfo->compareEnable,
2616            A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
2617    sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color);
2618    sampler->descriptor[3] = 0;
2619 
2620    if (reduction) {
2621       sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
2622          tu6_reduction_mode(reduction->reductionMode));
2623    }
2624 
2625    sampler->ycbcr_sampler = ycbcr_conversion ?
2626       tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
2627 
2628    if (sampler->ycbcr_sampler &&
2629        sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
2630       sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
2631    }
2632 
2633    /* TODO:
2634     * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
2635     */
2636 }
2637 
2638 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2639 tu_CreateSampler(VkDevice _device,
2640                  const VkSamplerCreateInfo *pCreateInfo,
2641                  const VkAllocationCallbacks *pAllocator,
2642                  VkSampler *pSampler)
2643 {
2644    TU_FROM_HANDLE(tu_device, device, _device);
2645    struct tu_sampler *sampler;
2646 
2647    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2648 
2649    sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
2650                              VK_OBJECT_TYPE_SAMPLER);
2651    if (!sampler)
2652       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2653 
2654    tu_init_sampler(device, sampler, pCreateInfo);
2655    *pSampler = tu_sampler_to_handle(sampler);
2656 
2657    return VK_SUCCESS;
2658 }
2659 
2660 VKAPI_ATTR void VKAPI_CALL
tu_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2661 tu_DestroySampler(VkDevice _device,
2662                   VkSampler _sampler,
2663                   const VkAllocationCallbacks *pAllocator)
2664 {
2665    TU_FROM_HANDLE(tu_device, device, _device);
2666    TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
2667    uint32_t border_color;
2668 
2669    if (!sampler)
2670       return;
2671 
2672    border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT;
2673    if (border_color >= TU_BORDER_COLOR_BUILTIN) {
2674       border_color -= TU_BORDER_COLOR_BUILTIN;
2675       /* if the sampler had a custom border color, free it. TODO: no lock */
2676       mtx_lock(&device->mutex);
2677       assert(!BITSET_TEST(device->custom_border_color, border_color));
2678       BITSET_SET(device->custom_border_color, border_color);
2679       mtx_unlock(&device->mutex);
2680    }
2681 
2682    vk_object_free(&device->vk, pAllocator, sampler);
2683 }
2684 
2685 /* vk_icd.h does not declare this function, so we declare it here to
2686  * suppress Wmissing-prototypes.
2687  */
2688 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2689 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2690 
2691 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)2692 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2693 {
2694    /* For the full details on loader interface versioning, see
2695     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2696     * What follows is a condensed summary, to help you navigate the large and
2697     * confusing official doc.
2698     *
2699     *   - Loader interface v0 is incompatible with later versions. We don't
2700     *     support it.
2701     *
2702     *   - In loader interface v1:
2703     *       - The first ICD entrypoint called by the loader is
2704     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2705     *         entrypoint.
2706     *       - The ICD must statically expose no other Vulkan symbol unless it
2707     * is linked with -Bsymbolic.
2708     *       - Each dispatchable Vulkan handle created by the ICD must be
2709     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
2710     *         ICD must initialize VK_LOADER_DATA.loadMagic to
2711     * ICD_LOADER_MAGIC.
2712     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2713     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
2714     *         such loader-managed surfaces.
2715     *
2716     *    - Loader interface v2 differs from v1 in:
2717     *       - The first ICD entrypoint called by the loader is
2718     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2719     *         statically expose this entrypoint.
2720     *
2721     *    - Loader interface v3 differs from v2 in:
2722     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2723     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2724     *          because the loader no longer does so.
2725     *
2726     *    - Loader interface v4 differs from v3 in:
2727     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2728     *
2729     *    - Loader interface v5 differs from v4 in:
2730     *        - The ICD must support Vulkan API version 1.1 and must not return
2731     *          VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
2732     *          Vulkan Loader with interface v4 or smaller is being used and the
2733     *          application provides an API version that is greater than 1.0.
2734     */
2735    *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
2736    return VK_SUCCESS;
2737 }
2738 
2739 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2740 tu_GetMemoryFdKHR(VkDevice _device,
2741                   const VkMemoryGetFdInfoKHR *pGetFdInfo,
2742                   int *pFd)
2743 {
2744    TU_FROM_HANDLE(tu_device, device, _device);
2745    TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
2746 
2747    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2748 
2749    /* At the moment, we support only the below handle types. */
2750    assert(pGetFdInfo->handleType ==
2751              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2752           pGetFdInfo->handleType ==
2753              VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2754 
2755    int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
2756    if (prime_fd < 0)
2757       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2758 
2759    *pFd = prime_fd;
2760    return VK_SUCCESS;
2761 }
2762 
2763 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2764 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
2765                             VkExternalMemoryHandleTypeFlagBits handleType,
2766                             int fd,
2767                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2768 {
2769    assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2770    pMemoryFdProperties->memoryTypeBits = 1;
2771    return VK_SUCCESS;
2772 }
2773 
2774 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)2775 tu_GetPhysicalDeviceExternalFenceProperties(
2776    VkPhysicalDevice physicalDevice,
2777    const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
2778    VkExternalFenceProperties *pExternalFenceProperties)
2779 {
2780    pExternalFenceProperties->exportFromImportedHandleTypes = 0;
2781    pExternalFenceProperties->compatibleHandleTypes = 0;
2782    pExternalFenceProperties->externalFenceFeatures = 0;
2783 }
2784 
2785 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)2786 tu_GetDeviceGroupPeerMemoryFeatures(
2787    VkDevice device,
2788    uint32_t heapIndex,
2789    uint32_t localDeviceIndex,
2790    uint32_t remoteDeviceIndex,
2791    VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
2792 {
2793    assert(localDeviceIndex == remoteDeviceIndex);
2794 
2795    *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2796                           VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2797                           VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2798                           VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2799 }
2800 
2801 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2802 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
2803    VkPhysicalDevice                            physicalDevice,
2804    VkSampleCountFlagBits                       samples,
2805    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
2806 {
2807    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
2808 
2809    if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
2810       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
2811    else
2812       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
2813 }
2814 
2815 VkDeviceAddress
tu_GetBufferDeviceAddress(VkDevice _device,const VkBufferDeviceAddressInfoKHR * pInfo)2816 tu_GetBufferDeviceAddress(VkDevice _device,
2817                           const VkBufferDeviceAddressInfoKHR* pInfo)
2818 {
2819    TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2820 
2821    return buffer->iova;
2822 }
2823 
tu_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfoKHR * pInfo)2824 uint64_t tu_GetBufferOpaqueCaptureAddress(
2825     VkDevice                                    device,
2826     const VkBufferDeviceAddressInfoKHR*         pInfo)
2827 {
2828    tu_stub();
2829    return 0;
2830 }
2831 
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfoKHR * pInfo)2832 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
2833     VkDevice                                    device,
2834     const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo)
2835 {
2836    tu_stub();
2837    return 0;
2838 }
2839