1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <string.h>
31 
32 #if defined(__FreeBSD__) || defined(__DragonFly__)
33 #include <sys/types.h>
34 #elif !defined(_WIN32)
35 #include <sys/sysmacros.h>
36 #endif
37 
38 #include "util/debug.h"
39 #include "util/disk_cache.h"
40 #include "radv_cs.h"
41 #include "radv_debug.h"
42 #include "radv_private.h"
43 #include "radv_shader.h"
44 #include "vk_util.h"
45 #ifdef _WIN32
46 typedef void *drmDevicePtr;
47 #include <io.h>
48 #else
49 #include <amdgpu.h>
50 #include <xf86drm.h>
51 #include "drm-uapi/amdgpu_drm.h"
52 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
53 #endif
54 #include "util/build_id.h"
55 #include "util/debug.h"
56 #include "util/driconf.h"
57 #include "util/mesa-sha1.h"
58 #include "util/timespec.h"
59 #include "util/u_atomic.h"
60 #include "winsys/null/radv_null_winsys_public.h"
61 #include "git_sha1.h"
62 #include "sid.h"
63 #include "vk_format.h"
64 #include "vulkan/vk_icd.h"
65 
66 #ifdef LLVM_AVAILABLE
67 #include "ac_llvm_util.h"
68 #endif
69 
70 /* The number of IBs per submit isn't infinite, it depends on the ring type
71  * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
72  * This limit is arbitrary but should be safe for now.  Ideally, we should get
73  * this limit from the KMD.
74  */
75 #define RADV_MAX_IBS_PER_SUBMIT 192
76 
77 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
78 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
79 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
80 #endif
81 
82 static struct radv_timeline_point *
83 radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
84                                          uint64_t p);
85 
86 static struct radv_timeline_point *radv_timeline_add_point_locked(struct radv_device *device,
87                                                                   struct radv_timeline *timeline,
88                                                                   uint64_t p);
89 
90 static void radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
91                                                  struct list_head *processing_list);
92 
93 static void radv_destroy_semaphore_part(struct radv_device *device,
94                                         struct radv_semaphore_part *part);
95 
96 uint64_t
radv_get_current_time(void)97 radv_get_current_time(void)
98 {
99    return os_time_get_nano();
100 }
101 
102 static uint64_t
radv_get_absolute_timeout(uint64_t timeout)103 radv_get_absolute_timeout(uint64_t timeout)
104 {
105    if (timeout == UINT64_MAX) {
106       return timeout;
107    } else {
108       uint64_t current_time = radv_get_current_time();
109 
110       timeout = MIN2(UINT64_MAX - current_time, timeout);
111 
112       return current_time + timeout;
113    }
114 }
115 
116 static int
radv_device_get_cache_uuid(enum radeon_family family,void * uuid)117 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
118 {
119    struct mesa_sha1 ctx;
120    unsigned char sha1[20];
121    unsigned ptr_size = sizeof(void *);
122 
123    memset(uuid, 0, VK_UUID_SIZE);
124    _mesa_sha1_init(&ctx);
125 
126    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx)
127 #ifdef LLVM_AVAILABLE
128        || !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)
129 #endif
130    )
131       return -1;
132 
133    _mesa_sha1_update(&ctx, &family, sizeof(family));
134    _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
135    _mesa_sha1_final(&ctx, sha1);
136 
137    memcpy(uuid, sha1, VK_UUID_SIZE);
138    return 0;
139 }
140 
141 static void
radv_get_driver_uuid(void * uuid)142 radv_get_driver_uuid(void *uuid)
143 {
144    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
145 }
146 
147 static void
radv_get_device_uuid(struct radeon_info * info,void * uuid)148 radv_get_device_uuid(struct radeon_info *info, void *uuid)
149 {
150    ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
151 }
152 
153 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * device)154 radv_get_adjusted_vram_size(struct radv_physical_device *device)
155 {
156    int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
157    if (ov >= 0)
158       return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
159    return device->rad_info.vram_size;
160 }
161 
162 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * device)163 radv_get_visible_vram_size(struct radv_physical_device *device)
164 {
165    return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
166 }
167 
168 static uint64_t
radv_get_vram_size(struct radv_physical_device * device)169 radv_get_vram_size(struct radv_physical_device *device)
170 {
171    uint64_t total_size = radv_get_adjusted_vram_size(device);
172    return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
173 }
174 
175 enum radv_heap {
176    RADV_HEAP_VRAM = 1 << 0,
177    RADV_HEAP_GTT = 1 << 1,
178    RADV_HEAP_VRAM_VIS = 1 << 2,
179    RADV_HEAP_MAX = 1 << 3,
180 };
181 
182 static void
radv_physical_device_init_mem_types(struct radv_physical_device * device)183 radv_physical_device_init_mem_types(struct radv_physical_device *device)
184 {
185    uint64_t visible_vram_size = radv_get_visible_vram_size(device);
186    uint64_t vram_size = radv_get_vram_size(device);
187    uint64_t gtt_size = device->rad_info.gart_size;
188    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
189 
190    device->memory_properties.memoryHeapCount = 0;
191    device->heaps = 0;
192 
193    if (!device->rad_info.has_dedicated_vram) {
194       /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
195        * greater than it. To workaround this, we compute the total available memory size (GTT +
196        * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
197        */
198       const uint64_t total_size = gtt_size + visible_vram_size;
199       visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);
200       gtt_size = total_size - visible_vram_size;
201       vram_size = 0;
202    }
203 
204    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
205     * remainder above visible VRAM. */
206    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
207       vram_index = device->memory_properties.memoryHeapCount++;
208       device->heaps |= RADV_HEAP_VRAM;
209       device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
210          .size = vram_size,
211          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
212       };
213    }
214 
215    if (gtt_size > 0) {
216       gart_index = device->memory_properties.memoryHeapCount++;
217       device->heaps |= RADV_HEAP_GTT;
218       device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
219          .size = gtt_size,
220          .flags = 0,
221       };
222    }
223 
224    if (visible_vram_size) {
225       visible_vram_index = device->memory_properties.memoryHeapCount++;
226       device->heaps |= RADV_HEAP_VRAM_VIS;
227       device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
228          .size = visible_vram_size,
229          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
230       };
231    }
232 
233    unsigned type_count = 0;
234 
235    if (vram_index >= 0 || visible_vram_index >= 0) {
236       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
237       device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
238       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
239          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
240          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
241       };
242    }
243 
244    if (gart_index >= 0) {
245       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
246       device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
247       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
248          .propertyFlags =
249             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
250          .heapIndex = gart_index,
251       };
252    }
253    if (visible_vram_index >= 0) {
254       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
255       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
256       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
257          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
258                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
259                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
260          .heapIndex = visible_vram_index,
261       };
262    }
263 
264    if (gart_index >= 0) {
265       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
266       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
267       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
268          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
269                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
270          .heapIndex = gart_index,
271       };
272    }
273    device->memory_properties.memoryTypeCount = type_count;
274 
275    if (device->rad_info.has_l2_uncached) {
276       for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
277          VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
278 
279          if ((mem_type.propertyFlags &
280               (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
281              mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
282 
283             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
284                                                    VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
285                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
286 
287             device->memory_domains[type_count] = device->memory_domains[i];
288             device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
289             device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
290                .propertyFlags = property_flags,
291                .heapIndex = mem_type.heapIndex,
292             };
293          }
294       }
295       device->memory_properties.memoryTypeCount = type_count;
296    }
297 }
298 
299 static const char *
radv_get_compiler_string(struct radv_physical_device * pdevice)300 radv_get_compiler_string(struct radv_physical_device *pdevice)
301 {
302    if (!pdevice->use_llvm) {
303       /* Some games like SotTR apply shader workarounds if the LLVM
304        * version is too old or if the LLVM version string is
305        * missing. This gives 2-5% performance with SotTR and ACO.
306        */
307       if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
308          return " (LLVM 9.0.1)";
309       }
310 
311       return "";
312    }
313 
314 #ifdef LLVM_AVAILABLE
315    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
316 #else
317    unreachable("LLVM is not available");
318 #endif
319 }
320 
321 int
radv_get_int_debug_option(const char * name,int default_value)322 radv_get_int_debug_option(const char *name, int default_value)
323 {
324    const char *str;
325    int result;
326 
327    str = getenv(name);
328    if (!str) {
329       result = default_value;
330    } else {
331       char *endptr;
332 
333       result = strtol(str, &endptr, 0);
334       if (str == endptr) {
335          /* No digits founs. */
336          result = default_value;
337       }
338    }
339 
340    return result;
341 }
342 
343 static bool
radv_thread_trace_enabled()344 radv_thread_trace_enabled()
345 {
346    return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
347           getenv("RADV_THREAD_TRACE_TRIGGER");
348 }
349 
350 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) ||                    \
351    defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
352 #define RADV_USE_WSI_PLATFORM
353 #endif
354 
355 #ifdef ANDROID
356 #define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
357 #else
358 #define RADV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
359 #endif
360 
361 VkResult
radv_EnumerateInstanceVersion(uint32_t * pApiVersion)362 radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
363 {
364    *pApiVersion = RADV_API_VERSION;
365    return VK_SUCCESS;
366 }
367 
368 static const struct vk_instance_extension_table radv_instance_extensions_supported = {
369    .KHR_device_group_creation = true,
370    .KHR_external_fence_capabilities = true,
371    .KHR_external_memory_capabilities = true,
372    .KHR_external_semaphore_capabilities = true,
373    .KHR_get_physical_device_properties2 = true,
374    .EXT_debug_report = true,
375 
376 #ifdef RADV_USE_WSI_PLATFORM
377    .KHR_get_surface_capabilities2 = true,
378    .KHR_surface = true,
379    .KHR_surface_protected_capabilities = true,
380 #endif
381 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
382    .KHR_wayland_surface = true,
383 #endif
384 #ifdef VK_USE_PLATFORM_XCB_KHR
385    .KHR_xcb_surface = true,
386 #endif
387 #ifdef VK_USE_PLATFORM_XLIB_KHR
388    .KHR_xlib_surface = true,
389 #endif
390 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
391    .EXT_acquire_xlib_display = true,
392 #endif
393 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
394    .KHR_display = true,
395    .KHR_get_display_properties2 = true,
396    .EXT_direct_mode_display = true,
397    .EXT_display_surface_counter = true,
398    .EXT_acquire_drm_display = true,
399 #endif
400 };
401 
402 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * device,struct vk_device_extension_table * ext)403 radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
404                                               struct vk_device_extension_table *ext)
405 {
406    *ext = (struct vk_device_extension_table){
407       .KHR_8bit_storage = true,
408       .KHR_16bit_storage = true,
409       .KHR_acceleration_structure = !!(device->instance->perftest_flags & RADV_PERFTEST_RT),
410       .KHR_bind_memory2 = true,
411       .KHR_buffer_device_address = true,
412       .KHR_copy_commands2 = true,
413       .KHR_create_renderpass2 = true,
414       .KHR_dedicated_allocation = true,
415       .KHR_deferred_host_operations = true,
416       .KHR_depth_stencil_resolve = true,
417       .KHR_descriptor_update_template = true,
418       .KHR_device_group = true,
419       .KHR_draw_indirect_count = true,
420       .KHR_driver_properties = true,
421       .KHR_external_fence = true,
422       .KHR_external_fence_fd = true,
423       .KHR_external_memory = true,
424       .KHR_external_memory_fd = true,
425       .KHR_external_semaphore = true,
426       .KHR_external_semaphore_fd = true,
427       .KHR_format_feature_flags2 = true,
428       .KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
429       .KHR_get_memory_requirements2 = true,
430       .KHR_image_format_list = true,
431       .KHR_imageless_framebuffer = true,
432 #ifdef RADV_USE_WSI_PLATFORM
433       .KHR_incremental_present = true,
434 #endif
435       .KHR_maintenance1 = true,
436       .KHR_maintenance2 = true,
437       .KHR_maintenance3 = true,
438       .KHR_maintenance4 = true,
439       .KHR_multiview = true,
440       .KHR_pipeline_executable_properties = true,
441       .KHR_pipeline_library = (device->instance->perftest_flags & RADV_PERFTEST_RT) && !device->use_llvm,
442       .KHR_push_descriptor = true,
443       .KHR_ray_tracing_pipeline = (device->instance->perftest_flags & RADV_PERFTEST_RT) && !device->use_llvm,
444       .KHR_relaxed_block_layout = true,
445       .KHR_sampler_mirror_clamp_to_edge = true,
446       .KHR_sampler_ycbcr_conversion = true,
447       .KHR_separate_depth_stencil_layouts = true,
448       .KHR_shader_atomic_int64 = true,
449       .KHR_shader_clock = true,
450       .KHR_shader_draw_parameters = true,
451       .KHR_shader_float16_int8 = true,
452       .KHR_shader_float_controls = true,
453       .KHR_shader_integer_dot_product = true,
454       .KHR_shader_non_semantic_info = true,
455       .KHR_shader_subgroup_extended_types = true,
456       .KHR_shader_subgroup_uniform_control_flow = true,
457       .KHR_shader_terminate_invocation = true,
458       .KHR_spirv_1_4 = true,
459       .KHR_storage_buffer_storage_class = true,
460 #ifdef RADV_USE_WSI_PLATFORM
461       .KHR_swapchain = true,
462       .KHR_swapchain_mutable_format = true,
463 #endif
464       .KHR_timeline_semaphore = true,
465       .KHR_uniform_buffer_standard_layout = true,
466       .KHR_variable_pointers = true,
467       .KHR_vulkan_memory_model = true,
468       .KHR_workgroup_memory_explicit_layout = true,
469       .KHR_zero_initialize_workgroup_memory = true,
470       .EXT_4444_formats = true,
471       .EXT_buffer_device_address = true,
472       .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
473       .EXT_color_write_enable = true,
474       .EXT_conditional_rendering = true,
475       .EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
476       .EXT_custom_border_color = true,
477       .EXT_debug_marker = radv_thread_trace_enabled(),
478       .EXT_depth_clip_enable = true,
479       .EXT_depth_range_unrestricted = true,
480       .EXT_descriptor_indexing = true,
481       .EXT_discard_rectangles = true,
482 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
483       .EXT_display_control = true,
484 #endif
485       .EXT_extended_dynamic_state = true,
486       .EXT_extended_dynamic_state2 = true,
487       .EXT_external_memory_dma_buf = true,
488       .EXT_external_memory_host = device->rad_info.has_userptr,
489       .EXT_global_priority = true,
490       .EXT_global_priority_query = true,
491       .EXT_host_query_reset = true,
492       .EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
493       .EXT_image_robustness = true,
494       .EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
495       .EXT_inline_uniform_block = true,
496       .EXT_line_rasterization = true,
497       .EXT_memory_budget = true,
498       .EXT_memory_priority = true,
499       .EXT_multi_draw = true,
500       .EXT_pci_bus_info = true,
501 #ifndef _WIN32
502       .EXT_physical_device_drm = true,
503 #endif
504       .EXT_pipeline_creation_cache_control = true,
505       .EXT_pipeline_creation_feedback = true,
506       .EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
507       .EXT_primitive_topology_list_restart = true,
508       .EXT_private_data = true,
509       .EXT_provoking_vertex = true,
510       .EXT_queue_family_foreign = true,
511       .EXT_robustness2 = true,
512       .EXT_sample_locations = device->rad_info.chip_class < GFX10,
513       .EXT_sampler_filter_minmax = true,
514       .EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
515       .EXT_shader_atomic_float = true,
516 #ifdef LLVM_AVAILABLE
517       .EXT_shader_atomic_float2 = !device->use_llvm || LLVM_VERSION_MAJOR >= 14,
518 #else
519       .EXT_shader_atomic_float2 = true,
520 #endif
521       .EXT_shader_demote_to_helper_invocation = true,
522       .EXT_shader_image_atomic_int64 = true,
523       .EXT_shader_stencil_export = true,
524       .EXT_shader_subgroup_ballot = true,
525       .EXT_shader_subgroup_vote = true,
526       .EXT_shader_viewport_index_layer = true,
527       .EXT_subgroup_size_control = true,
528       .EXT_texel_buffer_alignment = true,
529       .EXT_transform_feedback = true,
530       .EXT_vertex_attribute_divisor = true,
531       .EXT_vertex_input_dynamic_state = !device->use_llvm,
532       .EXT_ycbcr_image_arrays = true,
533       .AMD_buffer_marker = true,
534       .AMD_device_coherent_memory = true,
535       .AMD_draw_indirect_count = true,
536       .AMD_gcn_shader = true,
537       .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
538       .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
539       .AMD_memory_overallocation_behavior = true,
540       .AMD_mixed_attachment_samples = true,
541       .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
542       .AMD_shader_ballot = true,
543       .AMD_shader_core_properties = true,
544       .AMD_shader_core_properties2 = true,
545       .AMD_shader_explicit_vertex_parameter = true,
546       .AMD_shader_fragment_mask = true,
547       .AMD_shader_image_load_store_lod = true,
548       .AMD_shader_info = true,
549       .AMD_shader_trinary_minmax = true,
550       .AMD_texture_gather_bias_lod = true,
551 #ifdef ANDROID
552       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
553       .ANDROID_native_buffer = true,
554 #endif
555       .GOOGLE_decorate_string = true,
556       .GOOGLE_hlsl_functionality1 = true,
557       .GOOGLE_user_type = true,
558       .NV_compute_shader_derivatives = true,
559       .VALVE_mutable_descriptor_type = true,
560    };
561 }
562 
563 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** device_out)564 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
565                                 struct radv_physical_device **device_out)
566 {
567    VkResult result;
568    int fd = -1;
569    int master_fd = -1;
570 
571 #ifdef _WIN32
572    assert(drm_device == NULL);
573 #else
574    if (drm_device) {
575       const char *path = drm_device->nodes[DRM_NODE_RENDER];
576       drmVersionPtr version;
577 
578       fd = open(path, O_RDWR | O_CLOEXEC);
579       if (fd < 0) {
580          if (instance->debug_flags & RADV_DEBUG_STARTUP)
581             radv_logi("Could not open device '%s'", path);
582 
583          return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
584       }
585 
586       version = drmGetVersion(fd);
587       if (!version) {
588          close(fd);
589 
590          if (instance->debug_flags & RADV_DEBUG_STARTUP)
591             radv_logi("Could not get the kernel driver version for device '%s'", path);
592 
593          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to get version %s: %m",
594                           path);
595       }
596 
597       if (strcmp(version->name, "amdgpu")) {
598          drmFreeVersion(version);
599          close(fd);
600 
601          if (instance->debug_flags & RADV_DEBUG_STARTUP)
602             radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
603 
604          return VK_ERROR_INCOMPATIBLE_DRIVER;
605       }
606       drmFreeVersion(version);
607 
608       if (instance->debug_flags & RADV_DEBUG_STARTUP)
609          radv_logi("Found compatible device '%s'.", path);
610    }
611 #endif
612 
613    struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
614                                                     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
615    if (!device) {
616       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
617       goto fail_fd;
618    }
619 
620    struct vk_physical_device_dispatch_table dispatch_table;
621    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
622                                                       &radv_physical_device_entrypoints, true);
623    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
624                                                       &wsi_physical_device_entrypoints, false);
625 
626    result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
627    if (result != VK_SUCCESS) {
628       goto fail_alloc;
629    }
630 
631    device->instance = instance;
632 
633 #ifdef _WIN32
634    device->ws = radv_null_winsys_create();
635 #else
636    if (drm_device) {
637       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, false);
638    } else {
639       device->ws = radv_null_winsys_create();
640    }
641 #endif
642 
643    if (!device->ws) {
644       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
645       goto fail_base;
646    }
647 
648 #ifndef _WIN32
649    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
650       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
651       if (master_fd >= 0) {
652          uint32_t accel_working = 0;
653          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
654                                            .return_size = sizeof(accel_working),
655                                            .query = AMDGPU_INFO_ACCEL_WORKING};
656 
657          if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
658                 0 ||
659              !accel_working) {
660             close(master_fd);
661             master_fd = -1;
662          }
663       }
664    }
665 #endif
666 
667    device->master_fd = master_fd;
668    device->local_fd = fd;
669    device->ws->query_info(device->ws, &device->rad_info);
670 
671    device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
672 #ifndef LLVM_AVAILABLE
673    if (device->use_llvm) {
674       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
675                       "enabled at build time.\n");
676       abort();
677    }
678 #endif
679 
680    snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
681             radv_get_compiler_string(device));
682 
683 #ifdef ENABLE_SHADER_CACHE
684    if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
685       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
686       goto fail_wsi;
687    }
688 
689    /* The gpu id is already embedded in the uuid so we just pass "radv"
690     * when creating the cache.
691     */
692    char buf[VK_UUID_SIZE * 2 + 1];
693    disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
694    device->disk_cache = disk_cache_create(device->name, buf, 0);
695 #endif
696 
697    if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX10)
698       vk_warn_non_conformant_implementation("radv");
699 
700    radv_get_driver_uuid(&device->driver_uuid);
701    radv_get_device_uuid(&device->rad_info, &device->device_uuid);
702 
703    device->out_of_order_rast_allowed =
704       device->rad_info.has_out_of_order_rast &&
705       !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
706 
707    device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
708 
709    device->use_ngg = device->rad_info.chip_class >= GFX10 &&
710                      device->rad_info.family != CHIP_NAVI14 &&
711                      !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
712 
713    device->use_ngg_culling =
714       device->use_ngg &&
715       device->rad_info.max_render_backends > 1 &&
716       (device->rad_info.chip_class >= GFX10_3 ||
717        (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
718       !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
719 
720    device->use_ngg_streamout = false;
721 
722    /* Determine the number of threads per wave for all stages. */
723    device->cs_wave_size = 64;
724    device->ps_wave_size = 64;
725    device->ge_wave_size = 64;
726 
727    if (device->rad_info.chip_class >= GFX10) {
728       if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
729          device->cs_wave_size = 32;
730 
731       /* For pixel shaders, wave64 is recommanded. */
732       if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
733          device->ps_wave_size = 32;
734 
735       if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
736          device->ge_wave_size = 32;
737    }
738 
739    radv_physical_device_init_mem_types(device);
740 
741    radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
742 
743    radv_get_nir_options(device);
744 
745 #ifndef _WIN32
746    if (drm_device) {
747       struct stat primary_stat = {0}, render_stat = {0};
748 
749       device->available_nodes = drm_device->available_nodes;
750       device->bus_info = *drm_device->businfo.pci;
751 
752       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
753           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
754          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
755                             "failed to stat DRM primary node %s",
756                             drm_device->nodes[DRM_NODE_PRIMARY]);
757          goto fail_disk_cache;
758       }
759       device->primary_devid = primary_stat.st_rdev;
760 
761       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
762           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
763          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
764                             "failed to stat DRM render node %s",
765                             drm_device->nodes[DRM_NODE_RENDER]);
766          goto fail_disk_cache;
767       }
768       device->render_devid = render_stat.st_rdev;
769    }
770 #endif
771 
772    if ((device->instance->debug_flags & RADV_DEBUG_INFO))
773       ac_print_gpu_info(&device->rad_info, stdout);
774 
775    /* The WSI is structured as a layer on top of the driver, so this has
776     * to be the last part of initialization (at least until we get other
777     * semi-layers).
778     */
779    result = radv_init_wsi(device);
780    if (result != VK_SUCCESS) {
781       vk_error(instance, result);
782       goto fail_disk_cache;
783    }
784 
785    *device_out = device;
786 
787    return VK_SUCCESS;
788 
789 fail_disk_cache:
790    disk_cache_destroy(device->disk_cache);
791 #ifdef ENABLE_SHADER_CACHE
792 fail_wsi:
793 #endif
794    device->ws->destroy(device->ws);
795 fail_base:
796    vk_physical_device_finish(&device->vk);
797 fail_alloc:
798    vk_free(&instance->vk.alloc, device);
799 fail_fd:
800    if (fd != -1)
801       close(fd);
802    if (master_fd != -1)
803       close(master_fd);
804    return result;
805 }
806 
807 static void
radv_physical_device_destroy(struct radv_physical_device * device)808 radv_physical_device_destroy(struct radv_physical_device *device)
809 {
810    radv_finish_wsi(device);
811    device->ws->destroy(device->ws);
812    disk_cache_destroy(device->disk_cache);
813    if (device->local_fd != -1)
814       close(device->local_fd);
815    if (device->master_fd != -1)
816       close(device->master_fd);
817    vk_physical_device_finish(&device->vk);
818    vk_free(&device->instance->vk.alloc, device);
819 }
820 
821 static const struct debug_control radv_debug_options[] = {
822    {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
823    {"nodcc", RADV_DEBUG_NO_DCC},
824    {"shaders", RADV_DEBUG_DUMP_SHADERS},
825    {"nocache", RADV_DEBUG_NO_CACHE},
826    {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
827    {"nohiz", RADV_DEBUG_NO_HIZ},
828    {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
829    {"allbos", RADV_DEBUG_ALL_BOS},
830    {"noibs", RADV_DEBUG_NO_IBS},
831    {"spirv", RADV_DEBUG_DUMP_SPIRV},
832    {"vmfaults", RADV_DEBUG_VM_FAULTS},
833    {"zerovram", RADV_DEBUG_ZERO_VRAM},
834    {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
835    {"preoptir", RADV_DEBUG_PREOPTIR},
836    {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
837    {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
838    {"info", RADV_DEBUG_INFO},
839    {"startup", RADV_DEBUG_STARTUP},
840    {"checkir", RADV_DEBUG_CHECKIR},
841    {"nobinning", RADV_DEBUG_NOBINNING},
842    {"nongg", RADV_DEBUG_NO_NGG},
843    {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
844    {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
845    {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
846    {"llvm", RADV_DEBUG_LLVM},
847    {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
848    {"hang", RADV_DEBUG_HANG},
849    {"img", RADV_DEBUG_IMG},
850    {"noumr", RADV_DEBUG_NO_UMR},
851    {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
852    {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
853    {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
854    {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
855    {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
856    {"nonggc", RADV_DEBUG_NO_NGGC},
857    {"prologs", RADV_DEBUG_DUMP_PROLOGS},
858    {NULL, 0}};
859 
860 const char *
radv_get_debug_option_name(int id)861 radv_get_debug_option_name(int id)
862 {
863    assert(id < ARRAY_SIZE(radv_debug_options) - 1);
864    return radv_debug_options[id].string;
865 }
866 
867 static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},
868                                                              {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
869                                                              {"bolist", RADV_PERFTEST_BO_LIST},
870                                                              {"cswave32", RADV_PERFTEST_CS_WAVE_32},
871                                                              {"pswave32", RADV_PERFTEST_PS_WAVE_32},
872                                                              {"gewave32", RADV_PERFTEST_GE_WAVE_32},
873                                                              {"nosam", RADV_PERFTEST_NO_SAM},
874                                                              {"sam", RADV_PERFTEST_SAM},
875                                                              {"rt", RADV_PERFTEST_RT},
876                                                              {"nggc", RADV_PERFTEST_NGGC},
877                                                              {"force_emulate_rt", RADV_PERFTEST_FORCE_EMULATE_RT},
878                                                              {NULL, 0}};
879 
880 const char *
radv_get_perftest_option_name(int id)881 radv_get_perftest_option_name(int id)
882 {
883    assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
884    return radv_perftest_options[id].string;
885 }
886 
887 // clang-format off
888 static const driOptionDescription radv_dri_options[] = {
889    DRI_CONF_SECTION_PERFORMANCE
890       DRI_CONF_ADAPTIVE_SYNC(true)
891       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
892       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
893       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
894       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
895       DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
896       DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
897       DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
898       DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
899       DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false)
900       DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
901    DRI_CONF_SECTION_END
902 
903    DRI_CONF_SECTION_DEBUG
904       DRI_CONF_OVERRIDE_VRAM_SIZE()
905       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
906       DRI_CONF_RADV_ZERO_VRAM(false)
907       DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
908       DRI_CONF_RADV_INVARIANT_GEOM(false)
909       DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
910       DRI_CONF_RADV_DISABLE_DCC(false)
911       DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
912       DRI_CONF_RADV_DISABLE_HTILE_LAYERS(false)
913    DRI_CONF_SECTION_END
914 };
915 // clang-format on
916 
917 static void
radv_init_dri_options(struct radv_instance * instance)918 radv_init_dri_options(struct radv_instance *instance)
919 {
920    driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
921                       ARRAY_SIZE(radv_dri_options));
922    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL,
923                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
924                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
925 
926    instance->enable_mrt_output_nan_fixup =
927       driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
928 
929    instance->disable_shrink_image_store =
930       driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
931 
932    instance->absolute_depth_bias =
933       driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
934 
935    instance->disable_tc_compat_htile_in_general =
936       driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");
937 
938    if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
939       instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
940 
941    if (driQueryOptionb(&instance->dri_options, "radv_zero_vram"))
942       instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
943 
944    if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote"))
945       instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
946 
947    if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
948       instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
949 
950    if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
951       instance->debug_flags |= RADV_DEBUG_NO_DCC;
952 
953    instance->report_apu_as_dgpu =
954       driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");
955 
956    instance->disable_htile_layers =
957       driQueryOptionb(&instance->dri_options, "radv_disable_htile_layers");
958 }
959 
960 VkResult
radv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)961 radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
962                     const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
963 {
964    struct radv_instance *instance;
965    VkResult result;
966 
967    if (!pAllocator)
968       pAllocator = vk_default_allocator();
969 
970    instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
971    if (!instance)
972       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
973 
974    struct vk_instance_dispatch_table dispatch_table;
975    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
976    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false);
977    result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,
978                              pCreateInfo, pAllocator);
979    if (result != VK_SUCCESS) {
980       vk_free(pAllocator, instance);
981       return vk_error(instance, result);
982    }
983 
984    instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
985    instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);
986 
987    if (instance->debug_flags & RADV_DEBUG_STARTUP)
988       radv_logi("Created an instance");
989 
990    instance->physical_devices_enumerated = false;
991    list_inithead(&instance->physical_devices);
992 
993    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
994 
995    radv_init_dri_options(instance);
996 
997    *pInstance = radv_instance_to_handle(instance);
998 
999    return VK_SUCCESS;
1000 }
1001 
1002 void
radv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1003 radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
1004 {
1005    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1006 
1007    if (!instance)
1008       return;
1009 
1010    list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1011    {
1012       radv_physical_device_destroy(pdevice);
1013    }
1014 
1015    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1016 
1017    driDestroyOptionCache(&instance->dri_options);
1018    driDestroyOptionInfo(&instance->available_dri_options);
1019 
1020    vk_instance_finish(&instance->vk);
1021    vk_free(&instance->vk.alloc, instance);
1022 }
1023 
1024 static VkResult
radv_enumerate_physical_devices(struct radv_instance * instance)1025 radv_enumerate_physical_devices(struct radv_instance *instance)
1026 {
1027    if (instance->physical_devices_enumerated)
1028       return VK_SUCCESS;
1029 
1030    instance->physical_devices_enumerated = true;
1031 
1032    VkResult result = VK_SUCCESS;
1033 
1034    if (getenv("RADV_FORCE_FAMILY")) {
1035       /* When RADV_FORCE_FAMILY is set, the driver creates a nul
1036        * device that allows to test the compiler without having an
1037        * AMDGPU instance.
1038        */
1039       struct radv_physical_device *pdevice;
1040 
1041       result = radv_physical_device_try_create(instance, NULL, &pdevice);
1042       if (result != VK_SUCCESS)
1043          return result;
1044 
1045       list_addtail(&pdevice->link, &instance->physical_devices);
1046       return VK_SUCCESS;
1047    }
1048 
1049 #ifndef _WIN32
1050    /* TODO: Check for more devices ? */
1051    drmDevicePtr devices[8];
1052    int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1053 
1054    if (instance->debug_flags & RADV_DEBUG_STARTUP)
1055       radv_logi("Found %d drm nodes", max_devices);
1056 
1057    if (max_devices < 1)
1058       return vk_error(instance, VK_SUCCESS);
1059 
1060    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1061       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1062           devices[i]->bustype == DRM_BUS_PCI &&
1063           devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
1064 
1065          struct radv_physical_device *pdevice;
1066          result = radv_physical_device_try_create(instance, devices[i], &pdevice);
1067          /* Incompatible DRM device, skip. */
1068          if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1069             result = VK_SUCCESS;
1070             continue;
1071          }
1072 
1073          /* Error creating the physical device, report the error. */
1074          if (result != VK_SUCCESS)
1075             break;
1076 
1077          list_addtail(&pdevice->link, &instance->physical_devices);
1078       }
1079    }
1080    drmFreeDevices(devices, max_devices);
1081 #endif
1082 
1083    /* If we successfully enumerated any devices, call it success */
1084    return result;
1085 }
1086 
1087 VkResult
radv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)1088 radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
1089                               VkPhysicalDevice *pPhysicalDevices)
1090 {
1091    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1092    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
1093 
1094    VkResult result = radv_enumerate_physical_devices(instance);
1095    if (result != VK_SUCCESS)
1096       return result;
1097 
1098    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1099    {
1100       vk_outarray_append_typed(VkPhysicalDevice, &out, i)
1101       {
1102          *i = radv_physical_device_to_handle(pdevice);
1103       }
1104    }
1105 
1106    return vk_outarray_status(&out);
1107 }
1108 
1109 VkResult
radv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)1110 radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
1111                                    VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
1112 {
1113    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1114    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
1115                           pPhysicalDeviceGroupCount);
1116 
1117    VkResult result = radv_enumerate_physical_devices(instance);
1118    if (result != VK_SUCCESS)
1119       return result;
1120 
1121    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1122    {
1123       vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
1124       {
1125          p->physicalDeviceCount = 1;
1126          memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1127          p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
1128          p->subsetAllocation = false;
1129       }
1130    }
1131 
1132    return vk_outarray_status(&out);
1133 }
1134 
1135 void
radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)1136 radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
1137 {
1138    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1139    memset(pFeatures, 0, sizeof(*pFeatures));
1140 
1141    *pFeatures = (VkPhysicalDeviceFeatures){
1142       .robustBufferAccess = true,
1143       .fullDrawIndexUint32 = true,
1144       .imageCubeArray = true,
1145       .independentBlend = true,
1146       .geometryShader = true,
1147       .tessellationShader = true,
1148       .sampleRateShading = true,
1149       .dualSrcBlend = true,
1150       .logicOp = true,
1151       .multiDrawIndirect = true,
1152       .drawIndirectFirstInstance = true,
1153       .depthClamp = true,
1154       .depthBiasClamp = true,
1155       .fillModeNonSolid = true,
1156       .depthBounds = true,
1157       .wideLines = true,
1158       .largePoints = true,
1159       .alphaToOne = false,
1160       .multiViewport = true,
1161       .samplerAnisotropy = true,
1162       .textureCompressionETC2 = radv_device_supports_etc(pdevice),
1163       .textureCompressionASTC_LDR = false,
1164       .textureCompressionBC = true,
1165       .occlusionQueryPrecise = true,
1166       .pipelineStatisticsQuery = true,
1167       .vertexPipelineStoresAndAtomics = true,
1168       .fragmentStoresAndAtomics = true,
1169       .shaderTessellationAndGeometryPointSize = true,
1170       .shaderImageGatherExtended = true,
1171       .shaderStorageImageExtendedFormats = true,
1172       .shaderStorageImageMultisample = true,
1173       .shaderUniformBufferArrayDynamicIndexing = true,
1174       .shaderSampledImageArrayDynamicIndexing = true,
1175       .shaderStorageBufferArrayDynamicIndexing = true,
1176       .shaderStorageImageArrayDynamicIndexing = true,
1177       .shaderStorageImageReadWithoutFormat = true,
1178       .shaderStorageImageWriteWithoutFormat = true,
1179       .shaderClipDistance = true,
1180       .shaderCullDistance = true,
1181       .shaderFloat64 = true,
1182       .shaderInt64 = true,
1183       .shaderInt16 = true,
1184       .sparseBinding = true,
1185       .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
1186       .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
1187       .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
1188       .variableMultisampleRate = true,
1189       .shaderResourceMinLod = true,
1190       .shaderResourceResidency = true,
1191       .inheritedQueries = true,
1192    };
1193 }
1194 
1195 static void
radv_get_physical_device_features_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)1196 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1197                                       VkPhysicalDeviceVulkan11Features *f)
1198 {
1199    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1200 
1201    f->storageBuffer16BitAccess = true;
1202    f->uniformAndStorageBuffer16BitAccess = true;
1203    f->storagePushConstant16 = true;
1204    f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit;
1205    f->multiview = true;
1206    f->multiviewGeometryShader = true;
1207    f->multiviewTessellationShader = true;
1208    f->variablePointersStorageBuffer = true;
1209    f->variablePointers = true;
1210    f->protectedMemory = false;
1211    f->samplerYcbcrConversion = true;
1212    f->shaderDrawParameters = true;
1213 }
1214 
1215 static void
radv_get_physical_device_features_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)1216 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1217                                       VkPhysicalDeviceVulkan12Features *f)
1218 {
1219    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1220 
1221    f->samplerMirrorClampToEdge = true;
1222    f->drawIndirectCount = true;
1223    f->storageBuffer8BitAccess = true;
1224    f->uniformAndStorageBuffer8BitAccess = true;
1225    f->storagePushConstant8 = true;
1226    f->shaderBufferInt64Atomics = true;
1227    f->shaderSharedInt64Atomics = true;
1228    f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1229    f->shaderInt8 = true;
1230 
1231    f->descriptorIndexing = true;
1232    f->shaderInputAttachmentArrayDynamicIndexing = true;
1233    f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1234    f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1235    f->shaderUniformBufferArrayNonUniformIndexing = true;
1236    f->shaderSampledImageArrayNonUniformIndexing = true;
1237    f->shaderStorageBufferArrayNonUniformIndexing = true;
1238    f->shaderStorageImageArrayNonUniformIndexing = true;
1239    f->shaderInputAttachmentArrayNonUniformIndexing = true;
1240    f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1241    f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1242    f->descriptorBindingUniformBufferUpdateAfterBind = true;
1243    f->descriptorBindingSampledImageUpdateAfterBind = true;
1244    f->descriptorBindingStorageImageUpdateAfterBind = true;
1245    f->descriptorBindingStorageBufferUpdateAfterBind = true;
1246    f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1247    f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1248    f->descriptorBindingUpdateUnusedWhilePending = true;
1249    f->descriptorBindingPartiallyBound = true;
1250    f->descriptorBindingVariableDescriptorCount = true;
1251    f->runtimeDescriptorArray = true;
1252 
1253    f->samplerFilterMinmax = true;
1254    f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1255    f->imagelessFramebuffer = true;
1256    f->uniformBufferStandardLayout = true;
1257    f->shaderSubgroupExtendedTypes = true;
1258    f->separateDepthStencilLayouts = true;
1259    f->hostQueryReset = true;
1260    f->timelineSemaphore = true, f->bufferDeviceAddress = true;
1261    f->bufferDeviceAddressCaptureReplay = true;
1262    f->bufferDeviceAddressMultiDevice = true;
1263    f->vulkanMemoryModel = true;
1264    f->vulkanMemoryModelDeviceScope = true;
1265    f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1266    f->shaderOutputViewportIndex = true;
1267    f->shaderOutputLayer = true;
1268    f->subgroupBroadcastDynamicId = true;
1269 }
1270 
1271 void
radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1272 radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
1273                                 VkPhysicalDeviceFeatures2 *pFeatures)
1274 {
1275    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1276    radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1277 
1278    VkPhysicalDeviceVulkan11Features core_1_1 = {
1279       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1280    };
1281    radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1282 
1283    VkPhysicalDeviceVulkan12Features core_1_2 = {
1284       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1285    };
1286    radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1287 
1288 #define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
1289 
1290    vk_foreach_struct(ext, pFeatures->pNext)
1291    {
1292       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1293          continue;
1294       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1295          continue;
1296 
1297       switch (ext->sType) {
1298       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1299          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1300             (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
1301          features->conditionalRendering = true;
1302          features->inheritedConditionalRendering = false;
1303          break;
1304       }
1305       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1306          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1307             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1308          features->vertexAttributeInstanceRateDivisor = true;
1309          features->vertexAttributeInstanceRateZeroDivisor = true;
1310          break;
1311       }
1312       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1313          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1314             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1315          features->transformFeedback = true;
1316          features->geometryStreams = !pdevice->use_ngg_streamout;
1317          break;
1318       }
1319       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1320          VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1321             (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1322          CORE_FEATURE(1, 2, scalarBlockLayout);
1323          break;
1324       }
1325       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1326          VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1327             (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1328          features->memoryPriority = true;
1329          break;
1330       }
1331       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1332          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1333             (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1334          CORE_FEATURE(1, 2, bufferDeviceAddress);
1335          CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1336          CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1337          break;
1338       }
1339       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1340          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1341             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1342          features->depthClipEnable = true;
1343          break;
1344       }
1345       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1346          VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1347             (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1348          features->shaderDemoteToHelperInvocation = true;
1349          break;
1350       }
1351       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1352          VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1353             (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1354 
1355          features->inlineUniformBlock = true;
1356          features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1357          break;
1358       }
1359       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1360          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1361             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1362          features->computeDerivativeGroupQuads = false;
1363          features->computeDerivativeGroupLinear = true;
1364          break;
1365       }
1366       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1367          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1368             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1369          features->ycbcrImageArrays = true;
1370          break;
1371       }
1372       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1373          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1374             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1375          features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1376          break;
1377       }
1378       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1379          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1380             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1381          features->pipelineExecutableInfo = true;
1382          break;
1383       }
1384       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1385          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1386             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1387          features->shaderSubgroupClock = true;
1388          features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1389          break;
1390       }
1391       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1392          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1393             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1394          features->texelBufferAlignment = true;
1395          break;
1396       }
1397       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1398          VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1399             (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1400          features->subgroupSizeControl = true;
1401          features->computeFullSubgroups = true;
1402          break;
1403       }
1404       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1405          VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1406             (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1407          features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1408          break;
1409       }
1410       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1411          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1412             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1413          features->rectangularLines = false;
1414          features->bresenhamLines = true;
1415          features->smoothLines = false;
1416          features->stippledRectangularLines = false;
1417          /* FIXME: Some stippled Bresenham CTS fails on Vega10
1418           * but work on Raven.
1419           */
1420          features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
1421          features->stippledSmoothLines = false;
1422          break;
1423       }
1424       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1425          VkDeviceMemoryOverallocationCreateInfoAMD *features =
1426             (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1427          features->overallocationBehavior = true;
1428          break;
1429       }
1430       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1431          VkPhysicalDeviceRobustness2FeaturesEXT *features =
1432             (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1433          features->robustBufferAccess2 = true;
1434          features->robustImageAccess2 = true;
1435          features->nullDescriptor = true;
1436          break;
1437       }
1438       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1439          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1440             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1441          features->customBorderColors = true;
1442          features->customBorderColorWithoutFormat = true;
1443          break;
1444       }
1445       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1446          VkPhysicalDevicePrivateDataFeaturesEXT *features =
1447             (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1448          features->privateData = true;
1449          break;
1450       }
1451       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1452          VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1453             (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1454          features->pipelineCreationCacheControl = true;
1455          break;
1456       }
1457       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1458          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1459             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1460          features->extendedDynamicState = true;
1461          break;
1462       }
1463       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1464          VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1465             (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1466          features->robustImageAccess = true;
1467          break;
1468       }
1469       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1470          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1471             (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1472          features->shaderBufferFloat32Atomics = true;
1473          features->shaderBufferFloat32AtomicAdd = false;
1474          features->shaderBufferFloat64Atomics = true;
1475          features->shaderBufferFloat64AtomicAdd = false;
1476          features->shaderSharedFloat32Atomics = true;
1477          features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8;
1478          features->shaderSharedFloat64Atomics = true;
1479          features->shaderSharedFloat64AtomicAdd = false;
1480          features->shaderImageFloat32Atomics = true;
1481          features->shaderImageFloat32AtomicAdd = false;
1482          features->sparseImageFloat32Atomics = true;
1483          features->sparseImageFloat32AtomicAdd = false;
1484          break;
1485       }
1486       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1487          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1488             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1489          features->formatA4R4G4B4 = true;
1490          features->formatA4B4G4R4 = true;
1491          break;
1492       }
1493       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1494          VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1495             (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1496          features->shaderTerminateInvocation = true;
1497          break;
1498       }
1499       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1500          VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1501             (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1502          features->shaderImageInt64Atomics = true;
1503          features->sparseImageInt64Atomics = true;
1504          break;
1505       }
1506       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1507          VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1508             (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1509          features->mutableDescriptorType = true;
1510          break;
1511       }
1512       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1513          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1514             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1515          features->pipelineFragmentShadingRate = true;
1516          features->primitiveFragmentShadingRate = true;
1517          features->attachmentFragmentShadingRate = true;
1518          break;
1519       }
1520       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1521          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1522             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1523          features->workgroupMemoryExplicitLayout = true;
1524          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1525          features->workgroupMemoryExplicitLayout8BitAccess = true;
1526          features->workgroupMemoryExplicitLayout16BitAccess = true;
1527          break;
1528       }
1529       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
1530          VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
1531             (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
1532          features->shaderZeroInitializeWorkgroupMemory = true;
1533          break;
1534       }
1535       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1536          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1537             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1538          features->provokingVertexLast = true;
1539          features->transformFeedbackPreservesProvokingVertex = true;
1540          break;
1541       }
1542       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1543          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1544             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1545          features->extendedDynamicState2 = true;
1546          features->extendedDynamicState2LogicOp = true;
1547          features->extendedDynamicState2PatchControlPoints = false;
1548          break;
1549       }
1550       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
1551          VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
1552             (VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
1553          features->globalPriorityQuery = true;
1554          break;
1555       }
1556       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1557          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features =
1558             (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext;
1559          features->accelerationStructure = true;
1560          features->accelerationStructureCaptureReplay = false;
1561          features->accelerationStructureIndirectBuild = false;
1562          features->accelerationStructureHostCommands = true;
1563          features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1564          break;
1565       }
1566       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1567          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1568             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1569          features->shaderSubgroupUniformControlFlow = true;
1570          break;
1571       }
1572       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1573          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1574          features->multiDraw = true;
1575          break;
1576       }
1577       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1578          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1579             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1580          features->colorWriteEnable = true;
1581          break;
1582       }
1583       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1584          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features =
1585             (VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *)ext;
1586          bool has_shader_buffer_float_minmax = ((pdevice->rad_info.chip_class == GFX6 ||
1587                                                  pdevice->rad_info.chip_class == GFX7) &&
1588                                                 !pdevice->use_llvm) ||
1589                                                pdevice->rad_info.chip_class >= GFX10;
1590          bool has_shader_image_float_minmax = pdevice->rad_info.chip_class != GFX8 &&
1591                                               pdevice->rad_info.chip_class != GFX9;
1592          features->shaderBufferFloat16Atomics = false;
1593          features->shaderBufferFloat16AtomicAdd = false;
1594          features->shaderBufferFloat16AtomicMinMax = false;
1595          features->shaderBufferFloat32AtomicMinMax = has_shader_buffer_float_minmax;
1596          features->shaderBufferFloat64AtomicMinMax = has_shader_buffer_float_minmax;
1597          features->shaderSharedFloat16Atomics = false;
1598          features->shaderSharedFloat16AtomicAdd = false;
1599          features->shaderSharedFloat16AtomicMinMax = false;
1600          features->shaderSharedFloat32AtomicMinMax = true;
1601          features->shaderSharedFloat64AtomicMinMax = true;
1602          features->shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1603          features->sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1604          break;
1605       }
1606       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1607          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1608             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1609          features->primitiveTopologyListRestart = true;
1610          features->primitiveTopologyPatchListRestart = false;
1611          break;
1612       }
1613       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR: {
1614          VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *features =
1615             (VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *)ext;
1616          features->shaderIntegerDotProduct = true;
1617          break;
1618       }
1619       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: {
1620          VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features =
1621             (VkPhysicalDeviceRayTracingPipelineFeaturesKHR *)ext;
1622          features->rayTracingPipeline = true;
1623          features->rayTracingPipelineShaderGroupHandleCaptureReplay = false;
1624          features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false;
1625          features->rayTracingPipelineTraceRaysIndirect = false;
1626          features->rayTraversalPrimitiveCulling = false;
1627          break;
1628       }
1629       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR: {
1630          VkPhysicalDeviceMaintenance4FeaturesKHR *features =
1631             (VkPhysicalDeviceMaintenance4FeaturesKHR *)ext;
1632          features->maintenance4 = true;
1633          break;
1634       }
1635       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
1636          VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features =
1637             (VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *)ext;
1638          features->vertexInputDynamicState = true;
1639          break;
1640       }
1641       default:
1642          break;
1643       }
1644    }
1645 }
1646 
1647 static size_t
radv_max_descriptor_set_size()1648 radv_max_descriptor_set_size()
1649 {
1650    /* make sure that the entire descriptor set is addressable with a signed
1651     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1652     * be at most 2 GiB. the combined image & samples object count as one of
1653     * both. This limit is for the pipeline layout, not for the set layout, but
1654     * there is no set limit, so we just set a pipeline limit. I don't think
1655     * any app is going to hit this soon. */
1656    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1657            MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1658           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1659            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1660            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1661            64 /* storage image */);
1662 }
1663 
1664 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdevice)1665 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1666 {
1667    uint32_t uniform_offset_alignment =
1668       driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
1669    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1670       fprintf(stderr,
1671               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1672               "not a power of two\n",
1673               uniform_offset_alignment);
1674       uniform_offset_alignment = 0;
1675    }
1676 
1677    /* Take at least the hardware limit. */
1678    return MAX2(uniform_offset_alignment, 4);
1679 }
1680 
1681 void
radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1682 radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1683                                  VkPhysicalDeviceProperties *pProperties)
1684 {
1685    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1686    VkSampleCountFlags sample_counts = 0xf;
1687 
1688    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1689 
1690    VkPhysicalDeviceLimits limits = {
1691       .maxImageDimension1D = (1 << 14),
1692       .maxImageDimension2D = (1 << 14),
1693       .maxImageDimension3D = (1 << 11),
1694       .maxImageDimensionCube = (1 << 14),
1695       .maxImageArrayLayers = (1 << 11),
1696       .maxTexelBufferElements = UINT32_MAX,
1697       .maxUniformBufferRange = UINT32_MAX,
1698       .maxStorageBufferRange = UINT32_MAX,
1699       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1700       .maxMemoryAllocationCount = UINT32_MAX,
1701       .maxSamplerAllocationCount = 64 * 1024,
1702       .bufferImageGranularity = 1,
1703       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1704       .maxBoundDescriptorSets = MAX_SETS,
1705       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1706       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1707       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1708       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1709       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1710       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1711       .maxPerStageResources = max_descriptor_set_size,
1712       .maxDescriptorSetSamplers = max_descriptor_set_size,
1713       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1714       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1715       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1716       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1717       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1718       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1719       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1720       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1721       .maxVertexInputBindings = MAX_VBS,
1722       .maxVertexInputAttributeOffset = UINT32_MAX,
1723       .maxVertexInputBindingStride = 2048,
1724       .maxVertexOutputComponents = 128,
1725       .maxTessellationGenerationLevel = 64,
1726       .maxTessellationPatchSize = 32,
1727       .maxTessellationControlPerVertexInputComponents = 128,
1728       .maxTessellationControlPerVertexOutputComponents = 128,
1729       .maxTessellationControlPerPatchOutputComponents = 120,
1730       .maxTessellationControlTotalOutputComponents = 4096,
1731       .maxTessellationEvaluationInputComponents = 128,
1732       .maxTessellationEvaluationOutputComponents = 128,
1733       .maxGeometryShaderInvocations = 127,
1734       .maxGeometryInputComponents = 64,
1735       .maxGeometryOutputComponents = 128,
1736       .maxGeometryOutputVertices = 256,
1737       .maxGeometryTotalOutputComponents = 1024,
1738       .maxFragmentInputComponents = 128,
1739       .maxFragmentOutputAttachments = 8,
1740       .maxFragmentDualSrcAttachments = 1,
1741       .maxFragmentCombinedOutputResources = 8,
1742       .maxComputeSharedMemorySize = pdevice->rad_info.chip_class >= GFX7 ? 65536 : 32768,
1743       .maxComputeWorkGroupCount = {65535, 65535, 65535},
1744       .maxComputeWorkGroupInvocations = 1024,
1745       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1746       .subPixelPrecisionBits = 8,
1747       .subTexelPrecisionBits = 8,
1748       .mipmapPrecisionBits = 8,
1749       .maxDrawIndexedIndexValue = UINT32_MAX,
1750       .maxDrawIndirectCount = UINT32_MAX,
1751       .maxSamplerLodBias = 16,
1752       .maxSamplerAnisotropy = 16,
1753       .maxViewports = MAX_VIEWPORTS,
1754       .maxViewportDimensions = {(1 << 14), (1 << 14)},
1755       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1756       .viewportSubPixelBits = 8,
1757       .minMemoryMapAlignment = 4096, /* A page */
1758       .minTexelBufferOffsetAlignment = 4,
1759       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1760       .minStorageBufferOffsetAlignment = 4,
1761       .minTexelOffset = -32,
1762       .maxTexelOffset = 31,
1763       .minTexelGatherOffset = -32,
1764       .maxTexelGatherOffset = 31,
1765       .minInterpolationOffset = -2,
1766       .maxInterpolationOffset = 2,
1767       .subPixelInterpolationOffsetBits = 8,
1768       .maxFramebufferWidth = (1 << 14),
1769       .maxFramebufferHeight = (1 << 14),
1770       .maxFramebufferLayers = (1 << 10),
1771       .framebufferColorSampleCounts = sample_counts,
1772       .framebufferDepthSampleCounts = sample_counts,
1773       .framebufferStencilSampleCounts = sample_counts,
1774       .framebufferNoAttachmentsSampleCounts = sample_counts,
1775       .maxColorAttachments = MAX_RTS,
1776       .sampledImageColorSampleCounts = sample_counts,
1777       .sampledImageIntegerSampleCounts = sample_counts,
1778       .sampledImageDepthSampleCounts = sample_counts,
1779       .sampledImageStencilSampleCounts = sample_counts,
1780       .storageImageSampleCounts = sample_counts,
1781       .maxSampleMaskWords = 1,
1782       .timestampComputeAndGraphics = true,
1783       .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1784       .maxClipDistances = 8,
1785       .maxCullDistances = 8,
1786       .maxCombinedClipAndCullDistances = 8,
1787       .discreteQueuePriorities = 2,
1788       .pointSizeRange = {0.0, 8191.875},
1789       .lineWidthRange = {0.0, 8191.875},
1790       .pointSizeGranularity = (1.0 / 8.0),
1791       .lineWidthGranularity = (1.0 / 8.0),
1792       .strictLines = false, /* FINISHME */
1793       .standardSampleLocations = true,
1794       .optimalBufferCopyOffsetAlignment = 1,
1795       .optimalBufferCopyRowPitchAlignment = 1,
1796       .nonCoherentAtomSize = 64,
1797    };
1798 
1799    VkPhysicalDeviceType device_type;
1800 
1801    if (pdevice->rad_info.has_dedicated_vram || pdevice->instance->report_apu_as_dgpu) {
1802       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1803    } else {
1804       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1805    }
1806 
1807    *pProperties = (VkPhysicalDeviceProperties){
1808       .apiVersion = RADV_API_VERSION,
1809       .driverVersion = vk_get_driver_version(),
1810       .vendorID = ATI_VENDOR_ID,
1811       .deviceID = pdevice->rad_info.pci_id,
1812       .deviceType = device_type,
1813       .limits = limits,
1814       .sparseProperties =
1815          {
1816             .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
1817             .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
1818          },
1819    };
1820 
1821    strcpy(pProperties->deviceName, pdevice->name);
1822    memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1823 }
1824 
1825 static void
radv_get_physical_device_properties_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1826 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1827                                         VkPhysicalDeviceVulkan11Properties *p)
1828 {
1829    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1830 
1831    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1832    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1833    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1834    /* The LUID is for Windows. */
1835    p->deviceLUIDValid = false;
1836    p->deviceNodeMask = 0;
1837 
1838    p->subgroupSize = RADV_SUBGROUP_SIZE;
1839    p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1840    p->subgroupSupportedOperations =
1841       VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1842       VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1843       VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1844       VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1845    p->subgroupQuadOperationsInAllStages = true;
1846 
1847    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1848    p->maxMultiviewViewCount = MAX_VIEWS;
1849    p->maxMultiviewInstanceIndex = INT_MAX;
1850    p->protectedNoFault = false;
1851    p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1852    p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1853 }
1854 
1855 static void
radv_get_physical_device_properties_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1856 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1857                                         VkPhysicalDeviceVulkan12Properties *p)
1858 {
1859    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1860 
1861    p->driverID = VK_DRIVER_ID_MESA_RADV;
1862    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1863    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1864             radv_get_compiler_string(pdevice));
1865    p->conformanceVersion = (VkConformanceVersion){
1866       .major = 1,
1867       .minor = 2,
1868       .subminor = 3,
1869       .patch = 0,
1870    };
1871 
1872    /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1873     * controlled by the same config register.
1874     */
1875    if (pdevice->rad_info.has_packed_math_16bit) {
1876       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1877       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1878    } else {
1879       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1880       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1881    }
1882 
1883    /* With LLVM, do not allow both preserving and flushing denorms because
1884     * different shaders in the same pipeline can have different settings and
1885     * this won't work for merged shaders. To make it work, this requires LLVM
1886     * support for changing the register. The same logic applies for the
1887     * rounding modes because they are configured with the same config
1888     * register.
1889     */
1890    p->shaderDenormFlushToZeroFloat32 = true;
1891    p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1892    p->shaderRoundingModeRTEFloat32 = true;
1893    p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1894    p->shaderSignedZeroInfNanPreserveFloat32 = true;
1895 
1896    p->shaderDenormFlushToZeroFloat16 =
1897       pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1898    p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1899    p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1900    p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1901    p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1902 
1903    p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1904    p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1905    p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1906    p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1907    p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1908 
1909    p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1910    p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1911    p->shaderSampledImageArrayNonUniformIndexingNative = false;
1912    p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1913    p->shaderStorageImageArrayNonUniformIndexingNative = false;
1914    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1915    p->robustBufferAccessUpdateAfterBind = true;
1916    p->quadDivergentImplicitLod = false;
1917 
1918    size_t max_descriptor_set_size =
1919       ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1920        MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1921       (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1922        32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1923        32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1924        64 /* storage image */);
1925    p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1926    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1927    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1928    p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1929    p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1930    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1931    p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1932    p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1933    p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1934    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1935    p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1936    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1937    p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1938    p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1939    p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1940 
1941    /* We support all of the depth resolve modes */
1942    p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1943                                    VK_RESOLVE_MODE_AVERAGE_BIT_KHR | VK_RESOLVE_MODE_MIN_BIT_KHR |
1944                                    VK_RESOLVE_MODE_MAX_BIT_KHR;
1945 
1946    /* Average doesn't make sense for stencil so we don't support that */
1947    p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1948                                      VK_RESOLVE_MODE_MIN_BIT_KHR | VK_RESOLVE_MODE_MAX_BIT_KHR;
1949 
1950    p->independentResolveNone = true;
1951    p->independentResolve = true;
1952 
1953    /* GFX6-8 only support single channel min/max filter. */
1954    p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1955    p->filterMinmaxSingleComponentFormats = true;
1956 
1957    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1958 
1959    p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1960 }
1961 
1962 void
radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1963 radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1964                                   VkPhysicalDeviceProperties2 *pProperties)
1965 {
1966    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1967    radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1968 
1969    VkPhysicalDeviceVulkan11Properties core_1_1 = {
1970       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1971    };
1972    radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1973 
1974    VkPhysicalDeviceVulkan12Properties core_1_2 = {
1975       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1976    };
1977    radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1978 
1979    vk_foreach_struct(ext, pProperties->pNext)
1980    {
1981       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1982          continue;
1983       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1984          continue;
1985 
1986       switch (ext->sType) {
1987       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1988          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1989             (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
1990          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1991          break;
1992       }
1993       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1994          VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1995             (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
1996          properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1997          break;
1998       }
1999       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2000          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
2001             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
2002          properties->minImportedHostPointerAlignment = 4096;
2003          break;
2004       }
2005       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
2006          VkPhysicalDeviceShaderCorePropertiesAMD *properties =
2007             (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
2008 
2009          /* Shader engines. */
2010          properties->shaderEngineCount = pdevice->rad_info.max_se;
2011          properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
2012          properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
2013          properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
2014          properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
2015          properties->wavefrontSize = 64;
2016 
2017          /* SGPR. */
2018          properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
2019          properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
2020          properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
2021          properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
2022 
2023          /* VGPR. */
2024          properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
2025          properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
2026          properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
2027          properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
2028          break;
2029       }
2030       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
2031          VkPhysicalDeviceShaderCoreProperties2AMD *properties =
2032             (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
2033 
2034          properties->shaderCoreFeatures = 0;
2035          properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
2036          break;
2037       }
2038       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2039          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
2040             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2041          properties->maxVertexAttribDivisor = UINT32_MAX;
2042          break;
2043       }
2044       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2045          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2046             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2047          properties->primitiveOverestimationSize = 0;
2048          properties->maxExtraPrimitiveOverestimationSize = 0;
2049          properties->extraPrimitiveOverestimationSizeGranularity = 0;
2050          properties->primitiveUnderestimation = false;
2051          properties->conservativePointAndLineRasterization = false;
2052          properties->degenerateTrianglesRasterized = true;
2053          properties->degenerateLinesRasterized = false;
2054          properties->fullyCoveredFragmentShaderInputVariable = false;
2055          properties->conservativeRasterizationPostDepthCoverage = false;
2056          break;
2057       }
2058 #ifndef _WIN32
2059       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2060          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2061             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2062          properties->pciDomain = pdevice->bus_info.domain;
2063          properties->pciBus = pdevice->bus_info.bus;
2064          properties->pciDevice = pdevice->bus_info.dev;
2065          properties->pciFunction = pdevice->bus_info.func;
2066          break;
2067       }
2068 #endif
2069       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2070          VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2071             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2072          properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2073          properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2074          properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2075          properties->maxTransformFeedbackStreamDataSize = 512;
2076          properties->maxTransformFeedbackBufferDataSize = 512;
2077          properties->maxTransformFeedbackBufferDataStride = 512;
2078          properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2079          properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2080          properties->transformFeedbackRasterizationStreamSelect = false;
2081          properties->transformFeedbackDraw = true;
2082          break;
2083       }
2084       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2085          VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2086             (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2087 
2088          props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2089          props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2090          props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2091             MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2092          props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2093          props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2094          break;
2095       }
2096       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2097          VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2098             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2099          properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
2100                                                   VK_SAMPLE_COUNT_8_BIT;
2101          properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2102          properties->sampleLocationCoordinateRange[0] = 0.0f;
2103          properties->sampleLocationCoordinateRange[1] = 0.9375f;
2104          properties->sampleLocationSubPixelBits = 4;
2105          properties->variableSampleLocations = false;
2106          break;
2107       }
2108       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2109          VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
2110             (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2111          properties->storageTexelBufferOffsetAlignmentBytes = 4;
2112          properties->storageTexelBufferOffsetSingleTexelAlignment = true;
2113          properties->uniformTexelBufferOffsetAlignmentBytes = 4;
2114          properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
2115          break;
2116       }
2117       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2118          VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2119             (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2120          props->minSubgroupSize = 64;
2121          props->maxSubgroupSize = 64;
2122          props->maxComputeWorkgroupSubgroups = UINT32_MAX;
2123          props->requiredSubgroupSizeStages = 0;
2124 
2125          if (pdevice->rad_info.chip_class >= GFX10) {
2126             /* Only GFX10+ supports wave32. */
2127             props->minSubgroupSize = 32;
2128             props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2129          }
2130          break;
2131       }
2132       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2133          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2134             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2135          props->lineSubPixelPrecisionBits = 4;
2136          break;
2137       }
2138       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2139          VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2140             (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2141          properties->robustStorageBufferAccessSizeAlignment = 4;
2142          properties->robustUniformBufferAccessSizeAlignment = 4;
2143          break;
2144       }
2145       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2146          VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2147             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2148          props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2149          break;
2150       }
2151       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2152          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2153             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2154          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2155          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2156          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2157          props->primitiveFragmentShadingRateWithMultipleViewports = true;
2158          props->layeredShadingRateAttachments = false; /* TODO */
2159          props->fragmentShadingRateNonTrivialCombinerOps = true;
2160          props->maxFragmentSize = (VkExtent2D){2, 2};
2161          props->maxFragmentSizeAspectRatio = 2;
2162          props->maxFragmentShadingRateCoverageSamples = 32;
2163          props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
2164          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2165          props->fragmentShadingRateWithSampleMask = true;
2166          props->fragmentShadingRateWithShaderSampleMask = false;
2167          props->fragmentShadingRateWithConservativeRasterization = true;
2168          props->fragmentShadingRateWithFragmentShaderInterlock = false;
2169          props->fragmentShadingRateWithCustomSampleLocations = false;
2170          props->fragmentShadingRateStrictMultiplyCombiner = true;
2171          break;
2172       }
2173       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2174          VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
2175             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2176          props->provokingVertexModePerPipeline = true;
2177          props->transformFeedbackPreservesTriangleFanProvokingVertex = true;
2178          break;
2179       }
2180       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2181          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props =
2182             (VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext;
2183          props->maxGeometryCount = (1 << 24) - 1;
2184          props->maxInstanceCount = (1 << 24) - 1;
2185          props->maxPrimitiveCount = (1 << 29) - 1;
2186          props->maxPerStageDescriptorAccelerationStructures =
2187             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2188          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =
2189             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2190          props->maxDescriptorSetAccelerationStructures =
2191             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2192          props->maxDescriptorSetUpdateAfterBindAccelerationStructures =
2193             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2194          props->minAccelerationStructureScratchOffsetAlignment = 128;
2195          break;
2196       }
2197 #ifndef _WIN32
2198       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2199          VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2200          if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {
2201             props->hasPrimary = true;
2202             props->primaryMajor = (int64_t)major(pdevice->primary_devid);
2203             props->primaryMinor = (int64_t)minor(pdevice->primary_devid);
2204          } else {
2205             props->hasPrimary = false;
2206          }
2207          if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {
2208             props->hasRender = true;
2209             props->renderMajor = (int64_t)major(pdevice->render_devid);
2210             props->renderMinor = (int64_t)minor(pdevice->render_devid);
2211          } else {
2212             props->hasRender = false;
2213          }
2214          break;
2215       }
2216 #endif
2217       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2218          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2219          props->maxMultiDrawCount = 2048;
2220          break;
2221       }
2222       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR: {
2223          VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *props =
2224             (VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *)ext;
2225 
2226          bool accel = pdevice->rad_info.has_accelerated_dot_product;
2227 
2228          props->integerDotProduct8BitUnsignedAccelerated = accel;
2229          props->integerDotProduct8BitSignedAccelerated = accel;
2230          props->integerDotProduct8BitMixedSignednessAccelerated = false;
2231          props->integerDotProduct4x8BitPackedUnsignedAccelerated = accel;
2232          props->integerDotProduct4x8BitPackedSignedAccelerated = accel;
2233          props->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
2234          props->integerDotProduct16BitUnsignedAccelerated = accel;
2235          props->integerDotProduct16BitSignedAccelerated = accel;
2236          props->integerDotProduct16BitMixedSignednessAccelerated = false;
2237          props->integerDotProduct32BitUnsignedAccelerated = false;
2238          props->integerDotProduct32BitSignedAccelerated = false;
2239          props->integerDotProduct32BitMixedSignednessAccelerated = false;
2240          props->integerDotProduct64BitUnsignedAccelerated = false;
2241          props->integerDotProduct64BitSignedAccelerated = false;
2242          props->integerDotProduct64BitMixedSignednessAccelerated = false;
2243          props->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel;
2244          props->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel;
2245          props->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2246          props->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel;
2247          props->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel;
2248          props->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
2249             false;
2250          props->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel;
2251          props->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel;
2252          props->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2253          props->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2254          props->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2255          props->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2256          props->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2257          props->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2258          props->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2259          break;
2260       }
2261       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: {
2262          VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props =
2263             (VkPhysicalDeviceRayTracingPipelinePropertiesKHR *)ext;
2264          props->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
2265          props->maxRayRecursionDepth = 31;    /* Minimum allowed for DXR. */
2266          props->maxShaderGroupStride = 16384; /* dummy */
2267          props->shaderGroupBaseAlignment = 16;
2268          props->shaderGroupHandleCaptureReplaySize = 16;
2269          props->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
2270          props->shaderGroupHandleAlignment = 16;
2271          props->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
2272          break;
2273       }
2274       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: {
2275          VkPhysicalDeviceMaintenance4PropertiesKHR *properties =
2276             (VkPhysicalDeviceMaintenance4PropertiesKHR *)ext;
2277          properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2278          break;
2279       }
2280       default:
2281          break;
2282       }
2283    }
2284 }
2285 
2286 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdevice,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2287 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
2288                                                  uint32_t *pCount,
2289                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2290 {
2291    int num_queue_families = 1;
2292    int idx;
2293    if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2294        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2295       num_queue_families++;
2296 
2297    if (pQueueFamilyProperties == NULL) {
2298       *pCount = num_queue_families;
2299       return;
2300    }
2301 
2302    if (!*pCount)
2303       return;
2304 
2305    idx = 0;
2306    if (*pCount >= 1) {
2307       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2308          .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
2309                        VK_QUEUE_SPARSE_BINDING_BIT,
2310          .queueCount = 1,
2311          .timestampValidBits = 64,
2312          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2313       };
2314       idx++;
2315    }
2316 
2317    if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2318        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2319       if (*pCount > idx) {
2320          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2321             .queueFlags =
2322                VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
2323             .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2324             .timestampValidBits = 64,
2325             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2326          };
2327          idx++;
2328       }
2329    }
2330    *pCount = idx;
2331 }
2332 
2333 void
radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)2334 radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2335                                             VkQueueFamilyProperties *pQueueFamilyProperties)
2336 {
2337    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2338    if (!pQueueFamilyProperties) {
2339       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2340       return;
2341    }
2342    VkQueueFamilyProperties *properties[] = {
2343       pQueueFamilyProperties + 0,
2344       pQueueFamilyProperties + 1,
2345       pQueueFamilyProperties + 2,
2346    };
2347    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2348    assert(*pCount <= 3);
2349 }
2350 
2351 static const VkQueueGlobalPriorityEXT radv_global_queue_priorities[] = {
2352    VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
2353    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
2354    VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
2355    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
2356 };
2357 
2358 void
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2359 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2360                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2361 {
2362    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2363    if (!pQueueFamilyProperties) {
2364       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2365       return;
2366    }
2367    VkQueueFamilyProperties *properties[] = {
2368       &pQueueFamilyProperties[0].queueFamilyProperties,
2369       &pQueueFamilyProperties[1].queueFamilyProperties,
2370       &pQueueFamilyProperties[2].queueFamilyProperties,
2371    };
2372    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2373    assert(*pCount <= 3);
2374 
2375    for (uint32_t i = 0; i < *pCount; i++) {
2376       vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)
2377       {
2378          switch (ext->sType) {
2379          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
2380             VkQueueFamilyGlobalPriorityPropertiesEXT *prop =
2381                (VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
2382             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_EXT);
2383             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2384             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2385             break;
2386          }
2387          default:
2388             break;
2389          }
2390       }
2391    }
2392 }
2393 
2394 void
radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2395 radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
2396                                        VkPhysicalDeviceMemoryProperties *pMemoryProperties)
2397 {
2398    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2399 
2400    *pMemoryProperties = physical_device->memory_properties;
2401 }
2402 
2403 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2404 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2405                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2406 {
2407    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2408    VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2409 
2410    /* For all memory heaps, the computation of budget is as follow:
2411     *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2412     *
2413     * The Vulkan spec 1.1.97 says that the budget should include any
2414     * currently allocated device memory.
2415     *
2416     * Note that the application heap usages are not really accurate (eg.
2417     * in presence of shared buffers).
2418     */
2419    if (!device->rad_info.has_dedicated_vram) {
2420       /* On APUs, the driver exposes fake heaps to the application because usually the carveout is
2421        * too small for games but the budgets need to be redistributed accordingly.
2422        */
2423 
2424       assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2425       assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2426       assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2427       uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2428 
2429       /* Get the visible VRAM/GTT heap sizes and internal usages. */
2430       uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
2431       uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2432 
2433       uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2434                                          device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2435       uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2436 
2437       /* Compute the total heap size, internal and system usage. */
2438       uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2439       uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2440       uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2441                                     device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2442 
2443       uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2444 
2445       /* Compute the total free space that can be allocated for this process accross all heaps. */
2446       uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2447 
2448       /* Compute the remaining visible VRAM size for this process. */
2449       uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2450 
2451       /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes,
2452        * and align down to the page size to be conservative.
2453        */
2454       vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),
2455                                           device->rad_info.gart_page_size);
2456       uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2457 
2458       memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2459       memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2460       memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2461       memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2462    } else {
2463       unsigned mask = device->heaps;
2464       unsigned heap = 0;
2465       while (mask) {
2466          uint64_t internal_usage = 0, system_usage = 0;
2467          unsigned type = 1u << u_bit_scan(&mask);
2468 
2469          switch (type) {
2470          case RADV_HEAP_VRAM:
2471             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2472             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2473             break;
2474          case RADV_HEAP_VRAM_VIS:
2475             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2476             if (!(device->heaps & RADV_HEAP_VRAM))
2477                internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2478             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2479             break;
2480          case RADV_HEAP_GTT:
2481             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2482             system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2483             break;
2484          }
2485 
2486          uint64_t total_usage = MAX2(internal_usage, system_usage);
2487 
2488          uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2489                                MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
2490          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2491          memoryBudget->heapUsage[heap] = internal_usage;
2492          ++heap;
2493       }
2494 
2495       assert(heap == memory_properties->memoryHeapCount);
2496    }
2497 
2498    /* The heapBudget and heapUsage values must be zero for array elements
2499     * greater than or equal to
2500     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2501     */
2502    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2503       memoryBudget->heapBudget[i] = 0;
2504       memoryBudget->heapUsage[i] = 0;
2505    }
2506 }
2507 
2508 void
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2509 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2510                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2511 {
2512    radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties);
2513 
2514    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2515       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2516    if (memory_budget)
2517       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2518 }
2519 
2520 VkResult
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)2521 radv_GetMemoryHostPointerPropertiesEXT(
2522    VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
2523    VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2524 {
2525    RADV_FROM_HANDLE(radv_device, device, _device);
2526 
2527    switch (handleType) {
2528    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2529       const struct radv_physical_device *physical_device = device->physical_device;
2530       uint32_t memoryTypeBits = 0;
2531       for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2532          if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2533              !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2534             memoryTypeBits = (1 << i);
2535             break;
2536          }
2537       }
2538       pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2539       return VK_SUCCESS;
2540    }
2541    default:
2542       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2543    }
2544 }
2545 
2546 static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT * pObj)2547 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2548 {
2549    /* Default to MEDIUM when a specific global priority isn't requested */
2550    if (!pObj)
2551       return RADEON_CTX_PRIORITY_MEDIUM;
2552 
2553    switch (pObj->globalPriority) {
2554    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2555       return RADEON_CTX_PRIORITY_REALTIME;
2556    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2557       return RADEON_CTX_PRIORITY_HIGH;
2558    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2559       return RADEON_CTX_PRIORITY_MEDIUM;
2560    case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2561       return RADEON_CTX_PRIORITY_LOW;
2562    default:
2563       unreachable("Illegal global priority value");
2564       return RADEON_CTX_PRIORITY_INVALID;
2565    }
2566 }
2567 
2568 static int
radv_queue_init(struct radv_device * device,struct radv_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info,const VkDeviceQueueGlobalPriorityCreateInfoEXT * global_priority)2569 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
2570                 int idx, const VkDeviceQueueCreateInfo *create_info,
2571                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2572 {
2573    queue->device = device;
2574    queue->priority = radv_get_queue_global_priority(global_priority);
2575    queue->hw_ctx = device->hw_ctx[queue->priority];
2576 
2577    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
2578    if (result != VK_SUCCESS)
2579       return result;
2580 
2581    list_inithead(&queue->pending_submissions);
2582    mtx_init(&queue->pending_mutex, mtx_plain);
2583 
2584    mtx_init(&queue->thread_mutex, mtx_plain);
2585    if (u_cnd_monotonic_init(&queue->thread_cond)) {
2586       vk_queue_finish(&queue->vk);
2587       return vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2588    }
2589    queue->cond_created = true;
2590 
2591    return VK_SUCCESS;
2592 }
2593 
2594 static void
radv_queue_finish(struct radv_queue * queue)2595 radv_queue_finish(struct radv_queue *queue)
2596 {
2597    if (queue->hw_ctx) {
2598       if (queue->cond_created) {
2599          if (queue->thread_running) {
2600             p_atomic_set(&queue->thread_exit, true);
2601             u_cnd_monotonic_broadcast(&queue->thread_cond);
2602             thrd_join(queue->submission_thread, NULL);
2603          }
2604 
2605          u_cnd_monotonic_destroy(&queue->thread_cond);
2606       }
2607 
2608       mtx_destroy(&queue->pending_mutex);
2609       mtx_destroy(&queue->thread_mutex);
2610    }
2611 
2612    if (queue->initial_full_flush_preamble_cs)
2613       queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2614    if (queue->initial_preamble_cs)
2615       queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2616    if (queue->continue_preamble_cs)
2617       queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2618    if (queue->descriptor_bo)
2619       queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
2620    if (queue->scratch_bo)
2621       queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
2622    if (queue->esgs_ring_bo)
2623       queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
2624    if (queue->gsvs_ring_bo)
2625       queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
2626    if (queue->tess_rings_bo)
2627       queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
2628    if (queue->gds_bo)
2629       queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
2630    if (queue->gds_oa_bo)
2631       queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
2632    if (queue->compute_scratch_bo)
2633       queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
2634 
2635    vk_queue_finish(&queue->vk);
2636 }
2637 
2638 static void
radv_device_init_gs_info(struct radv_device * device)2639 radv_device_init_gs_info(struct radv_device *device)
2640 {
2641    device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2642                                                   device->physical_device->rad_info.family);
2643 }
2644 
2645 static VkResult
radv_device_init_border_color(struct radv_device * device)2646 radv_device_init_border_color(struct radv_device *device)
2647 {
2648    VkResult result;
2649 
2650    result = device->ws->buffer_create(
2651       device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
2652       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
2653       RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
2654 
2655    if (result != VK_SUCCESS)
2656       return vk_error(device, result);
2657 
2658    result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
2659    if (result != VK_SUCCESS)
2660       return vk_error(device, result);
2661 
2662    device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
2663    if (!device->border_color_data.colors_gpu_ptr)
2664       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2665    mtx_init(&device->border_color_data.mutex, mtx_plain);
2666 
2667    return VK_SUCCESS;
2668 }
2669 
2670 static void
radv_device_finish_border_color(struct radv_device * device)2671 radv_device_finish_border_color(struct radv_device *device)
2672 {
2673    if (device->border_color_data.bo) {
2674       device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
2675       device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
2676 
2677       mtx_destroy(&device->border_color_data.mutex);
2678    }
2679 }
2680 
2681 static VkResult
radv_device_init_vs_prologs(struct radv_device * device)2682 radv_device_init_vs_prologs(struct radv_device *device)
2683 {
2684    u_rwlock_init(&device->vs_prologs_lock);
2685    device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog);
2686    if (!device->vs_prologs)
2687       return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2688 
2689    /* don't pre-compile prologs if we want to print them */
2690    if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
2691       return VK_SUCCESS;
2692 
2693    struct radv_vs_input_state state;
2694    state.nontrivial_divisors = 0;
2695    memset(state.offsets, 0, sizeof(state.offsets));
2696    state.alpha_adjust_lo = 0;
2697    state.alpha_adjust_hi = 0;
2698    memset(state.formats, 0, sizeof(state.formats));
2699 
2700    struct radv_vs_prolog_key key;
2701    key.state = &state;
2702    key.misaligned_mask = 0;
2703    key.as_ls = false;
2704    key.is_ngg = device->physical_device->use_ngg;
2705    key.next_stage = MESA_SHADER_VERTEX;
2706    key.wave32 = device->physical_device->ge_wave_size == 32;
2707 
2708    for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
2709       state.attribute_mask = BITFIELD_MASK(i);
2710       state.instance_rate_inputs = 0;
2711 
2712       key.num_attributes = i;
2713 
2714       device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
2715       if (!device->simple_vs_prologs[i - 1])
2716          return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2717    }
2718 
2719    unsigned idx = 0;
2720    for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
2721       state.attribute_mask = BITFIELD_MASK(num_attributes);
2722 
2723       for (unsigned i = 0; i < num_attributes; i++)
2724          state.divisors[i] = 1;
2725 
2726       for (unsigned count = 1; count <= num_attributes; count++) {
2727          for (unsigned start = 0; start <= (num_attributes - count); start++) {
2728             state.instance_rate_inputs = u_bit_consecutive(start, count);
2729 
2730             key.num_attributes = num_attributes;
2731 
2732             struct radv_shader_prolog *prolog = radv_create_vs_prolog(device, &key);
2733             if (!prolog)
2734                return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2735 
2736             assert(idx ==
2737                    radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs));
2738             device->instance_rate_vs_prologs[idx++] = prolog;
2739          }
2740       }
2741    }
2742    assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs));
2743 
2744    return VK_SUCCESS;
2745 }
2746 
2747 static void
radv_device_finish_vs_prologs(struct radv_device * device)2748 radv_device_finish_vs_prologs(struct radv_device *device)
2749 {
2750    if (device->vs_prologs) {
2751       hash_table_foreach(device->vs_prologs, entry)
2752       {
2753          free((void *)entry->key);
2754          radv_prolog_destroy(device, entry->data);
2755       }
2756       _mesa_hash_table_destroy(device->vs_prologs, NULL);
2757    }
2758 
2759    for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++)
2760       radv_prolog_destroy(device, device->simple_vs_prologs[i]);
2761 
2762    for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++)
2763       radv_prolog_destroy(device, device->instance_rate_vs_prologs[i]);
2764 }
2765 
2766 VkResult
radv_device_init_vrs_state(struct radv_device * device)2767 radv_device_init_vrs_state(struct radv_device *device)
2768 {
2769    /* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this
2770     * dynamically at some point.
2771     */
2772    uint32_t width = 4096, height = 4096;
2773    VkDeviceMemory mem;
2774    VkBuffer buffer;
2775    VkResult result;
2776    VkImage image;
2777 
2778    VkImageCreateInfo image_create_info = {
2779       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2780       .imageType = VK_IMAGE_TYPE_2D,
2781       .format = VK_FORMAT_D16_UNORM,
2782       .extent = {width, height, 1},
2783       .mipLevels = 1,
2784       .arrayLayers = 1,
2785       .samples = VK_SAMPLE_COUNT_1_BIT,
2786       .tiling = VK_IMAGE_TILING_OPTIMAL,
2787       .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2788       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2789       .queueFamilyIndexCount = 0,
2790       .pQueueFamilyIndices = NULL,
2791       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2792    };
2793 
2794    result = radv_CreateImage(radv_device_to_handle(device), &image_create_info,
2795                              &device->meta_state.alloc, &image);
2796    if (result != VK_SUCCESS)
2797       return result;
2798 
2799    VkBufferCreateInfo buffer_create_info = {
2800       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2801       .size = radv_image_from_handle(image)->planes[0].surface.meta_size,
2802       .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
2803       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2804    };
2805 
2806    result = radv_CreateBuffer(radv_device_to_handle(device), &buffer_create_info,
2807                               &device->meta_state.alloc, &buffer);
2808    if (result != VK_SUCCESS)
2809       goto fail_create;
2810 
2811    VkBufferMemoryRequirementsInfo2 info = {
2812       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2813       .buffer = buffer,
2814    };
2815    VkMemoryRequirements2 mem_req = {
2816       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2817    };
2818    radv_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
2819 
2820    VkMemoryAllocateInfo alloc_info = {
2821       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2822       .allocationSize = mem_req.memoryRequirements.size,
2823    };
2824 
2825    result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info,
2826                                 &device->meta_state.alloc, &mem);
2827    if (result != VK_SUCCESS)
2828       goto fail_alloc;
2829 
2830    VkBindBufferMemoryInfo bind_info = {
2831       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
2832       .buffer = buffer,
2833       .memory = mem,
2834       .memoryOffset = 0
2835    };
2836 
2837    result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
2838    if (result != VK_SUCCESS)
2839       goto fail_bind;
2840 
2841    device->vrs.image = radv_image_from_handle(image);
2842    device->vrs.buffer = radv_buffer_from_handle(buffer);
2843    device->vrs.mem = radv_device_memory_from_handle(mem);
2844 
2845    return VK_SUCCESS;
2846 
2847 fail_bind:
2848    radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
2849 fail_alloc:
2850    radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
2851 fail_create:
2852    radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
2853 
2854    return result;
2855 }
2856 
2857 static void
radv_device_finish_vrs_image(struct radv_device * device)2858 radv_device_finish_vrs_image(struct radv_device *device)
2859 {
2860    radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
2861                    &device->meta_state.alloc);
2862    radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
2863                      &device->meta_state.alloc);
2864    radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
2865                      &device->meta_state.alloc);
2866 }
2867 
2868 VkResult
_radv_device_set_lost(struct radv_device * device,const char * file,int line,const char * msg,...)2869 _radv_device_set_lost(struct radv_device *device, const char *file, int line, const char *msg, ...)
2870 {
2871    VkResult err;
2872    va_list ap;
2873 
2874    p_atomic_inc(&device->lost);
2875 
2876    va_start(ap, msg);
2877    err =
2878       __vk_errorv(device, VK_ERROR_DEVICE_LOST, file, line, msg, ap);
2879    va_end(ap);
2880 
2881    return err;
2882 }
2883 
2884 VkResult
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2885 radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
2886                   const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
2887 {
2888    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2889    VkResult result;
2890    struct radv_device *device;
2891 
2892    bool keep_shader_info = false;
2893    bool robust_buffer_access = false;
2894    bool robust_buffer_access2 = false;
2895    bool overallocation_disallowed = false;
2896    bool custom_border_colors = false;
2897    bool attachment_vrs_enabled = false;
2898    bool image_float32_atomics = false;
2899    bool vs_prologs = false;
2900 
2901    /* Check enabled features */
2902    if (pCreateInfo->pEnabledFeatures) {
2903       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2904          robust_buffer_access = true;
2905    }
2906 
2907    vk_foreach_struct_const(ext, pCreateInfo->pNext)
2908    {
2909       switch (ext->sType) {
2910       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2911          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2912          if (features->features.robustBufferAccess)
2913             robust_buffer_access = true;
2914          break;
2915       }
2916       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
2917          const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
2918          if (overallocation->overallocationBehavior ==
2919              VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
2920             overallocation_disallowed = true;
2921          break;
2922       }
2923       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2924          const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
2925             (const void *)ext;
2926          custom_border_colors = border_color_features->customBorderColors;
2927          break;
2928       }
2929       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
2930          const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
2931          attachment_vrs_enabled = vrs->attachmentFragmentShadingRate;
2932          break;
2933       }
2934       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
2935          const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
2936          if (features->robustBufferAccess2)
2937             robust_buffer_access2 = true;
2938          break;
2939       }
2940       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
2941          const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext;
2942          if (features->shaderImageFloat32Atomics ||
2943              features->sparseImageFloat32Atomics)
2944             image_float32_atomics = true;
2945          break;
2946       }
2947       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
2948          const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext;
2949          if (features->shaderImageFloat32AtomicMinMax ||
2950              features->sparseImageFloat32AtomicMinMax)
2951             image_float32_atomics = true;
2952          break;
2953       }
2954       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
2955          const VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = (const void *)ext;
2956          if (features->vertexInputDynamicState)
2957             vs_prologs = true;
2958          break;
2959       }
2960       default:
2961          break;
2962       }
2963    }
2964 
2965    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2966                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2967    if (!device)
2968       return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2969 
2970    struct vk_device_dispatch_table dispatch_table;
2971 
2972    if (physical_device->instance->vk.app_info.app_name &&
2973        !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) {
2974       /* Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it
2975        * crashes sometimes.  Workaround this game bug by enabling an internal layer. Remove this
2976        * when the game is fixed.
2977        */
2978       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true);
2979       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
2980    } else if (radv_thread_trace_enabled()) {
2981       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
2982       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
2983    } else {
2984       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
2985    }
2986    vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false);
2987 
2988    result =
2989       vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
2990    if (result != VK_SUCCESS) {
2991       vk_free(&device->vk.alloc, device);
2992       return result;
2993    }
2994 
2995    device->instance = physical_device->instance;
2996    device->physical_device = physical_device;
2997 
2998    device->ws = physical_device->ws;
2999 
3000    keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
3001 
3002    /* With update after bind we can't attach bo's to the command buffer
3003     * from the descriptor set anymore, so we have to use a global BO list.
3004     */
3005    device->use_global_bo_list = (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
3006                                 device->vk.enabled_extensions.EXT_descriptor_indexing ||
3007                                 device->vk.enabled_extensions.EXT_buffer_device_address ||
3008                                 device->vk.enabled_extensions.KHR_buffer_device_address ||
3009                                 device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
3010                                 device->vk.enabled_extensions.KHR_acceleration_structure;
3011 
3012    device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
3013    device->robust_buffer_access2 = robust_buffer_access2;
3014 
3015    device->attachment_vrs_enabled = attachment_vrs_enabled;
3016 
3017    device->image_float32_atomics = image_float32_atomics;
3018 
3019    radv_init_shader_arenas(device);
3020 
3021    device->overallocation_disallowed = overallocation_disallowed;
3022    mtx_init(&device->overallocation_mutex, mtx_plain);
3023 
3024    /* Create one context per queue priority. */
3025    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3026       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3027       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3028          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3029       enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
3030 
3031       if (device->hw_ctx[priority])
3032          continue;
3033 
3034       result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
3035       if (result != VK_SUCCESS)
3036          goto fail;
3037    }
3038 
3039    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3040       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3041       uint32_t qfi = queue_create->queueFamilyIndex;
3042       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3043          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3044 
3045       device->queues[qfi] =
3046          vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
3047                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3048       if (!device->queues[qfi]) {
3049          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3050          goto fail;
3051       }
3052 
3053       memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
3054 
3055       device->queue_count[qfi] = queue_create->queueCount;
3056 
3057       for (unsigned q = 0; q < queue_create->queueCount; q++) {
3058          result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
3059          if (result != VK_SUCCESS)
3060             goto fail;
3061       }
3062    }
3063 
3064    device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
3065                          !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
3066 
3067    /* The maximum number of scratch waves. Scratch space isn't divided
3068     * evenly between CUs. The number is only a function of the number of CUs.
3069     * We can decrease the constant to decrease the scratch buffer size.
3070     *
3071     * sctx->scratch_waves must be >= the maximum possible size of
3072     * 1 threadgroup, so that the hw doesn't hang from being unable
3073     * to start any.
3074     *
3075     * The recommended value is 4 per CU at most. Higher numbers don't
3076     * bring much benefit, but they still occupy chip resources (think
3077     * async compute). I've seen ~2% performance difference between 4 and 32.
3078     */
3079    uint32_t max_threads_per_block = 2048;
3080    device->scratch_waves =
3081       MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
3082 
3083    device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
3084 
3085    if (device->physical_device->rad_info.chip_class >= GFX7) {
3086       /* If the KMD allows it (there is a KMD hw register for it),
3087        * allow launching waves out-of-order.
3088        */
3089       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
3090    }
3091 
3092    radv_device_init_gs_info(device);
3093 
3094    device->tess_offchip_block_dw_size =
3095       device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
3096 
3097    if (getenv("RADV_TRACE_FILE")) {
3098       fprintf(
3099          stderr,
3100          "***********************************************************************************\n");
3101       fprintf(
3102          stderr,
3103          "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
3104       fprintf(
3105          stderr,
3106          "***********************************************************************************\n");
3107       abort();
3108    }
3109 
3110    if (device->instance->debug_flags & RADV_DEBUG_HANG) {
3111       /* Enable GPU hangs detection and dump logs if a GPU hang is
3112        * detected.
3113        */
3114       keep_shader_info = true;
3115 
3116       if (!radv_init_trace(device))
3117          goto fail;
3118 
3119       fprintf(stderr,
3120               "*****************************************************************************\n");
3121       fprintf(stderr,
3122               "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
3123       fprintf(stderr,
3124               "*****************************************************************************\n");
3125 
3126       /* Wait for idle after every draw/dispatch to identify the
3127        * first bad call.
3128        */
3129       device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
3130 
3131       radv_dump_enabled_options(device, stderr);
3132    }
3133 
3134    if (radv_thread_trace_enabled()) {
3135       fprintf(stderr, "*************************************************\n");
3136       fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
3137       fprintf(stderr, "*************************************************\n");
3138 
3139       if (device->physical_device->rad_info.chip_class < GFX8 ||
3140           device->physical_device->rad_info.chip_class > GFX10_3) {
3141          fprintf(stderr, "GPU hardware not supported: refer to "
3142                          "the RGP documentation for the list of "
3143                          "supported GPUs!\n");
3144          abort();
3145       }
3146 
3147       if (!radv_thread_trace_init(device))
3148          goto fail;
3149    }
3150 
3151    if (getenv("RADV_TRAP_HANDLER")) {
3152       /* TODO: Add support for more hardware. */
3153       assert(device->physical_device->rad_info.chip_class == GFX8);
3154 
3155       fprintf(stderr, "**********************************************************************\n");
3156       fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
3157       fprintf(stderr, "**********************************************************************\n");
3158 
3159       /* To get the disassembly of the faulty shaders, we have to
3160        * keep some shader info around.
3161        */
3162       keep_shader_info = true;
3163 
3164       if (!radv_trap_handler_init(device))
3165          goto fail;
3166    }
3167 
3168    if (getenv("RADV_FORCE_VRS")) {
3169       const char *vrs_rates = getenv("RADV_FORCE_VRS");
3170 
3171       if (device->physical_device->rad_info.chip_class < GFX10_3)
3172          fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
3173       else if (!strcmp(vrs_rates, "2x2"))
3174          device->force_vrs = RADV_FORCE_VRS_2x2;
3175       else if (!strcmp(vrs_rates, "2x1"))
3176          device->force_vrs = RADV_FORCE_VRS_2x1;
3177       else if (!strcmp(vrs_rates, "1x2"))
3178          device->force_vrs = RADV_FORCE_VRS_1x2;
3179       else
3180          fprintf(stderr, "radv: Invalid VRS rates specified "
3181                          "(valid values are 2x2, 2x1 and 1x2)\n");
3182    }
3183 
3184    device->adjust_frag_coord_z =
3185       (device->vk.enabled_extensions.KHR_fragment_shading_rate ||
3186        device->force_vrs != RADV_FORCE_VRS_NONE) &&
3187       (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
3188        device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
3189        device->physical_device->rad_info.family == CHIP_VANGOGH);
3190 
3191    device->keep_shader_info = keep_shader_info;
3192    result = radv_device_init_meta(device);
3193    if (result != VK_SUCCESS)
3194       goto fail;
3195 
3196    radv_device_init_msaa(device);
3197 
3198    /* If the border color extension is enabled, let's create the buffer we need. */
3199    if (custom_border_colors) {
3200       result = radv_device_init_border_color(device);
3201       if (result != VK_SUCCESS)
3202          goto fail;
3203    }
3204 
3205    if (vs_prologs) {
3206       result = radv_device_init_vs_prologs(device);
3207       if (result != VK_SUCCESS)
3208          goto fail;
3209    }
3210 
3211    for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
3212       device->empty_cs[family] = device->ws->cs_create(device->ws, family);
3213       if (!device->empty_cs[family])
3214          goto fail;
3215 
3216       switch (family) {
3217       case RADV_QUEUE_GENERAL:
3218          radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
3219          radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
3220          radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
3221          break;
3222       case RADV_QUEUE_COMPUTE:
3223          radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
3224          radeon_emit(device->empty_cs[family], 0);
3225          break;
3226       }
3227 
3228       result = device->ws->cs_finalize(device->empty_cs[family]);
3229       if (result != VK_SUCCESS)
3230          goto fail;
3231    }
3232 
3233    if (device->physical_device->rad_info.chip_class >= GFX7)
3234       cik_create_gfx_config(device);
3235 
3236    VkPipelineCacheCreateInfo ci;
3237    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
3238    ci.pNext = NULL;
3239    ci.flags = 0;
3240    ci.pInitialData = NULL;
3241    ci.initialDataSize = 0;
3242    VkPipelineCache pc;
3243    result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
3244    if (result != VK_SUCCESS)
3245       goto fail_meta;
3246 
3247    device->mem_cache = radv_pipeline_cache_from_handle(pc);
3248 
3249    if (u_cnd_monotonic_init(&device->timeline_cond)) {
3250       result = VK_ERROR_INITIALIZATION_FAILED;
3251       goto fail_mem_cache;
3252    }
3253 
3254    device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
3255    if (device->force_aniso >= 0) {
3256       fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
3257               1 << util_logbase2(device->force_aniso));
3258    }
3259 
3260    *pDevice = radv_device_to_handle(device);
3261    return VK_SUCCESS;
3262 
3263 fail_mem_cache:
3264    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3265 fail_meta:
3266    radv_device_finish_meta(device);
3267 fail:
3268    radv_thread_trace_finish(device);
3269    free(device->thread_trace.trigger_file);
3270 
3271    radv_trap_handler_finish(device);
3272    radv_finish_trace(device);
3273 
3274    if (device->gfx_init)
3275       device->ws->buffer_destroy(device->ws, device->gfx_init);
3276 
3277    radv_device_finish_vs_prologs(device);
3278    radv_device_finish_border_color(device);
3279 
3280    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3281       for (unsigned q = 0; q < device->queue_count[i]; q++)
3282          radv_queue_finish(&device->queues[i][q]);
3283       if (device->queue_count[i])
3284          vk_free(&device->vk.alloc, device->queues[i]);
3285    }
3286 
3287    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3288       if (device->hw_ctx[i])
3289          device->ws->ctx_destroy(device->hw_ctx[i]);
3290    }
3291 
3292    vk_device_finish(&device->vk);
3293    vk_free(&device->vk.alloc, device);
3294    return result;
3295 }
3296 
3297 void
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3298 radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
3299 {
3300    RADV_FROM_HANDLE(radv_device, device, _device);
3301 
3302    if (!device)
3303       return;
3304 
3305    if (device->gfx_init)
3306       device->ws->buffer_destroy(device->ws, device->gfx_init);
3307 
3308    radv_device_finish_vs_prologs(device);
3309    radv_device_finish_border_color(device);
3310    radv_device_finish_vrs_image(device);
3311 
3312    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3313       for (unsigned q = 0; q < device->queue_count[i]; q++)
3314          radv_queue_finish(&device->queues[i][q]);
3315       if (device->queue_count[i])
3316          vk_free(&device->vk.alloc, device->queues[i]);
3317       if (device->empty_cs[i])
3318          device->ws->cs_destroy(device->empty_cs[i]);
3319    }
3320 
3321    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3322       if (device->hw_ctx[i])
3323          device->ws->ctx_destroy(device->hw_ctx[i]);
3324    }
3325 
3326    radv_device_finish_meta(device);
3327 
3328    VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3329    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3330 
3331    radv_trap_handler_finish(device);
3332    radv_finish_trace(device);
3333 
3334    radv_destroy_shader_arenas(device);
3335 
3336    u_cnd_monotonic_destroy(&device->timeline_cond);
3337 
3338    free(device->thread_trace.trigger_file);
3339    radv_thread_trace_finish(device);
3340 
3341    vk_device_finish(&device->vk);
3342    vk_free(&device->vk.alloc, device);
3343 }
3344 
3345 VkResult
radv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3346 radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
3347 {
3348    if (pProperties == NULL) {
3349       *pPropertyCount = 0;
3350       return VK_SUCCESS;
3351    }
3352 
3353    /* None supported at this time */
3354    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3355 }
3356 
3357 VkResult
radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)3358 radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
3359                                     VkLayerProperties *pProperties)
3360 {
3361    if (pProperties == NULL) {
3362       *pPropertyCount = 0;
3363       return VK_SUCCESS;
3364    }
3365 
3366    /* None supported at this time */
3367    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3368 }
3369 
3370 static void
fill_geom_tess_rings(struct radv_queue * queue,uint32_t * map,bool add_sample_positions,uint32_t esgs_ring_size,struct radeon_winsys_bo * esgs_ring_bo,uint32_t gsvs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t tess_factor_ring_size,uint32_t tess_offchip_ring_offset,uint32_t tess_offchip_ring_size,struct radeon_winsys_bo * tess_rings_bo)3371 fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_positions,
3372                      uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
3373                      uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
3374                      uint32_t tess_factor_ring_size, uint32_t tess_offchip_ring_offset,
3375                      uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo)
3376 {
3377    uint32_t *desc = &map[4];
3378 
3379    if (esgs_ring_bo) {
3380       uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3381 
3382       /* stride 0, num records - size, add tid, swizzle, elsize4,
3383          index stride 64 */
3384       desc[0] = esgs_va;
3385       desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | S_008F04_SWIZZLE_ENABLE(true);
3386       desc[2] = esgs_ring_size;
3387       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3388                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3389                 S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
3390 
3391       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3392          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3393                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3394       } else {
3395          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3396                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3397       }
3398 
3399       /* GS entry for ES->GS ring */
3400       /* stride 0, num records - size, elsize0,
3401          index stride 0 */
3402       desc[4] = esgs_va;
3403       desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3404       desc[6] = esgs_ring_size;
3405       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3406                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3407 
3408       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3409          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3410                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3411       } else {
3412          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3413                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3414       }
3415    }
3416 
3417    desc += 8;
3418 
3419    if (gsvs_ring_bo) {
3420       uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3421 
3422       /* VS entry for GS->VS ring */
3423       /* stride 0, num records - size, elsize0,
3424          index stride 0 */
3425       desc[0] = gsvs_va;
3426       desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3427       desc[2] = gsvs_ring_size;
3428       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3429                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3430 
3431       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3432          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3433                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3434       } else {
3435          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3436                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3437       }
3438 
3439       /* stride gsvs_itemsize, num records 64
3440          elsize 4, index stride 16 */
3441       /* shader will patch stride and desc[2] */
3442       desc[4] = gsvs_va;
3443       desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
3444       desc[6] = 0;
3445       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3446                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3447                 S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
3448 
3449       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3450          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3451                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3452       } else {
3453          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3454                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3455       }
3456    }
3457 
3458    desc += 8;
3459 
3460    if (tess_rings_bo) {
3461       uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3462       uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3463 
3464       desc[0] = tess_va;
3465       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3466       desc[2] = tess_factor_ring_size;
3467       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3468                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3469 
3470       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3471          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3472                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3473       } else {
3474          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3475                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3476       }
3477 
3478       desc[4] = tess_offchip_va;
3479       desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3480       desc[6] = tess_offchip_ring_size;
3481       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3482                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3483 
3484       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3485          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3486                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3487       } else {
3488          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3489                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3490       }
3491    }
3492 
3493    desc += 8;
3494 
3495    if (add_sample_positions) {
3496       /* add sample positions after all rings */
3497       memcpy(desc, queue->device->sample_locations_1x, 8);
3498       desc += 2;
3499       memcpy(desc, queue->device->sample_locations_2x, 16);
3500       desc += 4;
3501       memcpy(desc, queue->device->sample_locations_4x, 32);
3502       desc += 8;
3503       memcpy(desc, queue->device->sample_locations_8x, 64);
3504    }
3505 }
3506 
3507 static unsigned
radv_get_hs_offchip_param(struct radv_device * device,uint32_t * max_offchip_buffers_p)3508 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3509 {
3510    bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3511                                  device->physical_device->rad_info.family != CHIP_CARRIZO &&
3512                                  device->physical_device->rad_info.family != CHIP_STONEY;
3513    unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3514    unsigned max_offchip_buffers;
3515    unsigned offchip_granularity;
3516    unsigned hs_offchip_param;
3517 
3518    /*
3519     * Per RadeonSI:
3520     * This must be one less than the maximum number due to a hw limitation.
3521     * Various hardware bugs need thGFX7
3522     *
3523     * Per AMDVLK:
3524     * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3525     * Gfx7 should limit max_offchip_buffers to 508
3526     * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3527     *
3528     * Follow AMDVLK here.
3529     */
3530    if (device->physical_device->rad_info.chip_class >= GFX10) {
3531       max_offchip_buffers_per_se = 128;
3532    } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3533               device->physical_device->rad_info.chip_class == GFX7 ||
3534               device->physical_device->rad_info.chip_class == GFX6)
3535       --max_offchip_buffers_per_se;
3536 
3537    max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
3538 
3539    /* Hawaii has a bug with offchip buffers > 256 that can be worked
3540     * around by setting 4K granularity.
3541     */
3542    if (device->tess_offchip_block_dw_size == 4096) {
3543       assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3544       offchip_granularity = V_03093C_X_4K_DWORDS;
3545    } else {
3546       assert(device->tess_offchip_block_dw_size == 8192);
3547       offchip_granularity = V_03093C_X_8K_DWORDS;
3548    }
3549 
3550    switch (device->physical_device->rad_info.chip_class) {
3551    case GFX6:
3552       max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3553       break;
3554    case GFX7:
3555    case GFX8:
3556    case GFX9:
3557       max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3558       break;
3559    case GFX10:
3560       break;
3561    default:
3562       break;
3563    }
3564 
3565    *max_offchip_buffers_p = max_offchip_buffers;
3566    if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3567       hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3568                          S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3569    } else if (device->physical_device->rad_info.chip_class >= GFX7) {
3570       if (device->physical_device->rad_info.chip_class >= GFX8)
3571          --max_offchip_buffers;
3572       hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
3573                          S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
3574    } else {
3575       hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3576    }
3577    return hs_offchip_param;
3578 }
3579 
3580 static void
radv_emit_gs_ring_sizes(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * esgs_ring_bo,uint32_t esgs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t gsvs_ring_size)3581 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3582                         struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
3583                         struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
3584 {
3585    if (!esgs_ring_bo && !gsvs_ring_bo)
3586       return;
3587 
3588    if (esgs_ring_bo)
3589       radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3590 
3591    if (gsvs_ring_bo)
3592       radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3593 
3594    if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3595       radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3596       radeon_emit(cs, esgs_ring_size >> 8);
3597       radeon_emit(cs, gsvs_ring_size >> 8);
3598    } else {
3599       radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3600       radeon_emit(cs, esgs_ring_size >> 8);
3601       radeon_emit(cs, gsvs_ring_size >> 8);
3602    }
3603 }
3604 
3605 static void
radv_emit_tess_factor_ring(struct radv_queue * queue,struct radeon_cmdbuf * cs,unsigned hs_offchip_param,unsigned tf_ring_size,struct radeon_winsys_bo * tess_rings_bo)3606 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3607                            unsigned hs_offchip_param, unsigned tf_ring_size,
3608                            struct radeon_winsys_bo *tess_rings_bo)
3609 {
3610    uint64_t tf_va;
3611 
3612    if (!tess_rings_bo)
3613       return;
3614 
3615    tf_va = radv_buffer_get_va(tess_rings_bo);
3616 
3617    radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3618 
3619    if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3620       radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4));
3621       radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
3622 
3623       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3624          radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3625                                 S_030984_BASE_HI(tf_va >> 40));
3626       } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3627          radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
3628       }
3629       radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
3630    } else {
3631       radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size / 4));
3632       radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
3633       radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
3634    }
3635 }
3636 
3637 static void
radv_emit_graphics_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * scratch_bo)3638 radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3639                            uint32_t size_per_wave, uint32_t waves,
3640                            struct radeon_winsys_bo *scratch_bo)
3641 {
3642    if (queue->vk.queue_family_index != RADV_QUEUE_GENERAL)
3643       return;
3644 
3645    if (!scratch_bo)
3646       return;
3647 
3648    radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3649 
3650    radeon_set_context_reg(
3651       cs, R_0286E8_SPI_TMPRING_SIZE,
3652       S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3653 }
3654 
3655 static void
radv_emit_compute_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * compute_scratch_bo)3656 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3657                           uint32_t size_per_wave, uint32_t waves,
3658                           struct radeon_winsys_bo *compute_scratch_bo)
3659 {
3660    uint64_t scratch_va;
3661 
3662    if (!compute_scratch_bo)
3663       return;
3664 
3665    scratch_va = radv_buffer_get_va(compute_scratch_bo);
3666 
3667    radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3668 
3669    radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3670    radeon_emit(cs, scratch_va);
3671    radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1));
3672 
3673    radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3674                      S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3675 }
3676 
3677 static void
radv_emit_global_shader_pointers(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * descriptor_bo)3678 radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3679                                  struct radeon_winsys_bo *descriptor_bo)
3680 {
3681    uint64_t va;
3682 
3683    if (!descriptor_bo)
3684       return;
3685 
3686    va = radv_buffer_get_va(descriptor_bo);
3687 
3688    radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3689 
3690    if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3691       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3692                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3693                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3694 
3695       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3696          radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3697       }
3698    } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3699       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3700                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3701                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3702 
3703       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3704          radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3705       }
3706    } else {
3707       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3708                          R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
3709                          R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
3710 
3711       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3712          radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3713       }
3714    }
3715 }
3716 
3717 static void
radv_init_graphics_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3718 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3719 {
3720    struct radv_device *device = queue->device;
3721 
3722    if (device->gfx_init) {
3723       uint64_t va = radv_buffer_get_va(device->gfx_init);
3724 
3725       radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3726       radeon_emit(cs, va);
3727       radeon_emit(cs, va >> 32);
3728       radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3729 
3730       radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3731    } else {
3732       si_emit_graphics(device, cs);
3733    }
3734 }
3735 
3736 static void
radv_init_compute_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3737 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3738 {
3739    si_emit_compute(queue->device, cs);
3740 }
3741 
3742 static VkResult
radv_get_preamble_cs(struct radv_queue * queue,uint32_t scratch_size_per_wave,uint32_t scratch_waves,uint32_t compute_scratch_size_per_wave,uint32_t compute_scratch_waves,uint32_t esgs_ring_size,uint32_t gsvs_ring_size,bool needs_tess_rings,bool needs_gds,bool needs_gds_oa,bool needs_sample_positions,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)3743 radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
3744                      uint32_t scratch_waves, uint32_t compute_scratch_size_per_wave,
3745                      uint32_t compute_scratch_waves, uint32_t esgs_ring_size,
3746                      uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds,
3747                      bool needs_gds_oa, bool needs_sample_positions,
3748                      struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3749                      struct radeon_cmdbuf **initial_preamble_cs,
3750                      struct radeon_cmdbuf **continue_preamble_cs)
3751 {
3752    struct radeon_winsys_bo *scratch_bo = NULL;
3753    struct radeon_winsys_bo *descriptor_bo = NULL;
3754    struct radeon_winsys_bo *compute_scratch_bo = NULL;
3755    struct radeon_winsys_bo *esgs_ring_bo = NULL;
3756    struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3757    struct radeon_winsys_bo *tess_rings_bo = NULL;
3758    struct radeon_winsys_bo *gds_bo = NULL;
3759    struct radeon_winsys_bo *gds_oa_bo = NULL;
3760    struct radeon_cmdbuf *dest_cs[3] = {0};
3761    bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3762    unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3763    unsigned max_offchip_buffers;
3764    unsigned hs_offchip_param = 0;
3765    unsigned tess_offchip_ring_offset;
3766    uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3767    VkResult result = VK_SUCCESS;
3768    if (!queue->has_tess_rings) {
3769       if (needs_tess_rings)
3770          add_tess_rings = true;
3771    }
3772    if (!queue->has_gds) {
3773       if (needs_gds)
3774          add_gds = true;
3775    }
3776    if (!queue->has_gds_oa) {
3777       if (needs_gds_oa)
3778          add_gds_oa = true;
3779    }
3780    if (!queue->has_sample_positions) {
3781       if (needs_sample_positions)
3782          add_sample_positions = true;
3783    }
3784    tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3785    hs_offchip_param = radv_get_hs_offchip_param(queue->device, &max_offchip_buffers);
3786    tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3787    tess_offchip_ring_size = max_offchip_buffers * queue->device->tess_offchip_block_dw_size * 4;
3788 
3789    scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3790    if (scratch_size_per_wave)
3791       scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3792    else
3793       scratch_waves = 0;
3794 
3795    compute_scratch_size_per_wave =
3796       MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3797    if (compute_scratch_size_per_wave)
3798       compute_scratch_waves =
3799          MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3800    else
3801       compute_scratch_waves = 0;
3802 
3803    if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3804        scratch_waves <= queue->scratch_waves &&
3805        compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3806        compute_scratch_waves <= queue->compute_scratch_waves &&
3807        esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size &&
3808        !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3809        queue->initial_preamble_cs) {
3810       *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3811       *initial_preamble_cs = queue->initial_preamble_cs;
3812       *continue_preamble_cs = queue->continue_preamble_cs;
3813       if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&
3814           !gsvs_ring_size && !needs_tess_rings && !needs_gds && !needs_gds_oa &&
3815           !needs_sample_positions)
3816          *continue_preamble_cs = NULL;
3817       return VK_SUCCESS;
3818    }
3819 
3820    uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3821    uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3822    if (scratch_size > queue_scratch_size) {
3823       result =
3824          queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
3825                                           ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
3826       if (result != VK_SUCCESS)
3827          goto fail;
3828    } else
3829       scratch_bo = queue->scratch_bo;
3830 
3831    uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3832    uint32_t compute_queue_scratch_size =
3833       queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3834    if (compute_scratch_size > compute_queue_scratch_size) {
3835       result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096,
3836                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
3837                                                 RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
3838       if (result != VK_SUCCESS)
3839          goto fail;
3840 
3841    } else
3842       compute_scratch_bo = queue->compute_scratch_bo;
3843 
3844    if (esgs_ring_size > queue->esgs_ring_size) {
3845       result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
3846                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
3847                                                 RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
3848       if (result != VK_SUCCESS)
3849          goto fail;
3850    } else {
3851       esgs_ring_bo = queue->esgs_ring_bo;
3852       esgs_ring_size = queue->esgs_ring_size;
3853    }
3854 
3855    if (gsvs_ring_size > queue->gsvs_ring_size) {
3856       result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
3857                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
3858                                                 RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
3859       if (result != VK_SUCCESS)
3860          goto fail;
3861    } else {
3862       gsvs_ring_bo = queue->gsvs_ring_bo;
3863       gsvs_ring_size = queue->gsvs_ring_size;
3864    }
3865 
3866    if (add_tess_rings) {
3867       result = queue->device->ws->buffer_create(
3868          queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
3869          RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
3870       if (result != VK_SUCCESS)
3871          goto fail;
3872    } else {
3873       tess_rings_bo = queue->tess_rings_bo;
3874    }
3875 
3876    if (add_gds) {
3877       assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3878 
3879       /* 4 streamout GDS counters.
3880        * We need 256B (64 dw) of GDS, otherwise streamout hangs.
3881        */
3882       result =
3883          queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
3884                                           ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
3885       if (result != VK_SUCCESS)
3886          goto fail;
3887    } else {
3888       gds_bo = queue->gds_bo;
3889    }
3890 
3891    if (add_gds_oa) {
3892       assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3893 
3894       result =
3895          queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
3896                                           RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
3897       if (result != VK_SUCCESS)
3898          goto fail;
3899    } else {
3900       gds_oa_bo = queue->gds_oa_bo;
3901    }
3902 
3903    if (scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
3904        gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
3905        add_sample_positions) {
3906       uint32_t size = 0;
3907       if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {
3908          size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
3909          if (add_sample_positions)
3910             size += 128; /* 64+32+16+8 = 120 bytes */
3911       } else if (scratch_bo)
3912          size = 8; /* 2 dword */
3913 
3914       result = queue->device->ws->buffer_create(
3915          queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
3916          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
3917          RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
3918       if (result != VK_SUCCESS)
3919          goto fail;
3920    } else
3921       descriptor_bo = queue->descriptor_bo;
3922 
3923    if (descriptor_bo != queue->descriptor_bo) {
3924       uint32_t *map = (uint32_t *)queue->device->ws->buffer_map(descriptor_bo);
3925       if (!map)
3926          goto fail;
3927 
3928       if (scratch_bo) {
3929          uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
3930          uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
3931          map[0] = scratch_va;
3932          map[1] = rsrc1;
3933       }
3934 
3935       if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
3936          fill_geom_tess_rings(queue, map, add_sample_positions, esgs_ring_size, esgs_ring_bo,
3937                               gsvs_ring_size, gsvs_ring_bo, tess_factor_ring_size,
3938                               tess_offchip_ring_offset, tess_offchip_ring_size, tess_rings_bo);
3939 
3940       queue->device->ws->buffer_unmap(descriptor_bo);
3941    }
3942 
3943    for (int i = 0; i < 3; ++i) {
3944       enum rgp_flush_bits sqtt_flush_bits = 0;
3945       struct radeon_cmdbuf *cs = NULL;
3946       cs = queue->device->ws->cs_create(queue->device->ws,
3947                                         queue->vk.queue_family_index ? RING_COMPUTE : RING_GFX);
3948       if (!cs) {
3949          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3950          goto fail;
3951       }
3952 
3953       dest_cs[i] = cs;
3954 
3955       if (scratch_bo)
3956          radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3957 
3958       /* Emit initial configuration. */
3959       switch (queue->vk.queue_family_index) {
3960       case RADV_QUEUE_GENERAL:
3961          radv_init_graphics_state(cs, queue);
3962          break;
3963       case RADV_QUEUE_COMPUTE:
3964          radv_init_compute_state(cs, queue);
3965          break;
3966       case RADV_QUEUE_TRANSFER:
3967          break;
3968       }
3969 
3970       if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
3971          radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3972          radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
3973 
3974          radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3975          radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
3976       }
3977 
3978       radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, gsvs_ring_bo,
3979                               gsvs_ring_size);
3980       radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, tess_factor_ring_size, tess_rings_bo);
3981       radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
3982       radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave, compute_scratch_waves,
3983                                 compute_scratch_bo);
3984       radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo);
3985 
3986       if (gds_bo)
3987          radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
3988       if (gds_oa_bo)
3989          radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
3990 
3991       if (i == 0) {
3992          si_cs_emit_cache_flush(
3993             cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
3994             queue->vk.queue_family_index == RING_COMPUTE &&
3995                queue->device->physical_device->rad_info.chip_class >= GFX7,
3996             (queue->vk.queue_family_index == RADV_QUEUE_COMPUTE
3997                 ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
3998                 : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
3999                RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
4000                RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS,
4001             &sqtt_flush_bits, 0);
4002       } else if (i == 1) {
4003          si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
4004                                 queue->vk.queue_family_index == RING_COMPUTE &&
4005                                    queue->device->physical_device->rad_info.chip_class >= GFX7,
4006                                 RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
4007                                    RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
4008                                    RADV_CMD_FLAG_START_PIPELINE_STATS,
4009                                 &sqtt_flush_bits, 0);
4010       }
4011 
4012       result = queue->device->ws->cs_finalize(cs);
4013       if (result != VK_SUCCESS)
4014          goto fail;
4015    }
4016 
4017    if (queue->initial_full_flush_preamble_cs)
4018       queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
4019 
4020    if (queue->initial_preamble_cs)
4021       queue->device->ws->cs_destroy(queue->initial_preamble_cs);
4022 
4023    if (queue->continue_preamble_cs)
4024       queue->device->ws->cs_destroy(queue->continue_preamble_cs);
4025 
4026    queue->initial_full_flush_preamble_cs = dest_cs[0];
4027    queue->initial_preamble_cs = dest_cs[1];
4028    queue->continue_preamble_cs = dest_cs[2];
4029 
4030    if (scratch_bo != queue->scratch_bo) {
4031       if (queue->scratch_bo)
4032          queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
4033       queue->scratch_bo = scratch_bo;
4034    }
4035    queue->scratch_size_per_wave = scratch_size_per_wave;
4036    queue->scratch_waves = scratch_waves;
4037 
4038    if (compute_scratch_bo != queue->compute_scratch_bo) {
4039       if (queue->compute_scratch_bo)
4040          queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
4041       queue->compute_scratch_bo = compute_scratch_bo;
4042    }
4043    queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
4044    queue->compute_scratch_waves = compute_scratch_waves;
4045 
4046    if (esgs_ring_bo != queue->esgs_ring_bo) {
4047       if (queue->esgs_ring_bo)
4048          queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
4049       queue->esgs_ring_bo = esgs_ring_bo;
4050       queue->esgs_ring_size = esgs_ring_size;
4051    }
4052 
4053    if (gsvs_ring_bo != queue->gsvs_ring_bo) {
4054       if (queue->gsvs_ring_bo)
4055          queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
4056       queue->gsvs_ring_bo = gsvs_ring_bo;
4057       queue->gsvs_ring_size = gsvs_ring_size;
4058    }
4059 
4060    if (tess_rings_bo != queue->tess_rings_bo) {
4061       queue->tess_rings_bo = tess_rings_bo;
4062       queue->has_tess_rings = true;
4063    }
4064 
4065    if (gds_bo != queue->gds_bo) {
4066       queue->gds_bo = gds_bo;
4067       queue->has_gds = true;
4068    }
4069 
4070    if (gds_oa_bo != queue->gds_oa_bo) {
4071       queue->gds_oa_bo = gds_oa_bo;
4072       queue->has_gds_oa = true;
4073    }
4074 
4075    if (descriptor_bo != queue->descriptor_bo) {
4076       if (queue->descriptor_bo)
4077          queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
4078 
4079       queue->descriptor_bo = descriptor_bo;
4080    }
4081 
4082    if (add_sample_positions)
4083       queue->has_sample_positions = true;
4084 
4085    *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
4086    *initial_preamble_cs = queue->initial_preamble_cs;
4087    *continue_preamble_cs = queue->continue_preamble_cs;
4088    if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
4089       *continue_preamble_cs = NULL;
4090    return VK_SUCCESS;
4091 fail:
4092    for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
4093       if (dest_cs[i])
4094          queue->device->ws->cs_destroy(dest_cs[i]);
4095    if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
4096       queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
4097    if (scratch_bo && scratch_bo != queue->scratch_bo)
4098       queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
4099    if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
4100       queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
4101    if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
4102       queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
4103    if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
4104       queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
4105    if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
4106       queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
4107    if (gds_bo && gds_bo != queue->gds_bo)
4108       queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
4109    if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4110       queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
4111 
4112    return vk_error(queue, result);
4113 }
4114 
4115 static VkResult
radv_alloc_sem_counts(struct radv_device * device,struct radv_winsys_sem_counts * counts,int num_sems,struct radv_semaphore_part ** sems,const uint64_t * timeline_values,VkFence _fence,bool is_signal)4116 radv_alloc_sem_counts(struct radv_device *device, struct radv_winsys_sem_counts *counts,
4117                       int num_sems, struct radv_semaphore_part **sems,
4118                       const uint64_t *timeline_values, VkFence _fence, bool is_signal)
4119 {
4120    int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
4121 
4122    if (num_sems == 0 && _fence == VK_NULL_HANDLE)
4123       return VK_SUCCESS;
4124 
4125    for (uint32_t i = 0; i < num_sems; i++) {
4126       switch (sems[i]->kind) {
4127       case RADV_SEMAPHORE_SYNCOBJ:
4128          counts->syncobj_count++;
4129          counts->syncobj_reset_count++;
4130          break;
4131       case RADV_SEMAPHORE_NONE:
4132          break;
4133       case RADV_SEMAPHORE_TIMELINE:
4134          counts->syncobj_count++;
4135          break;
4136       case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4137          counts->timeline_syncobj_count++;
4138          break;
4139       }
4140    }
4141 
4142    if (_fence != VK_NULL_HANDLE)
4143       counts->syncobj_count++;
4144 
4145    if (counts->syncobj_count || counts->timeline_syncobj_count) {
4146       counts->points = (uint64_t *)malloc(sizeof(*counts->syncobj) * counts->syncobj_count +
4147                                           (sizeof(*counts->syncobj) + sizeof(*counts->points)) *
4148                                              counts->timeline_syncobj_count);
4149       if (!counts->points)
4150          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4151       counts->syncobj = (uint32_t *)(counts->points + counts->timeline_syncobj_count);
4152    }
4153 
4154    non_reset_idx = counts->syncobj_reset_count;
4155 
4156    for (uint32_t i = 0; i < num_sems; i++) {
4157       switch (sems[i]->kind) {
4158       case RADV_SEMAPHORE_NONE:
4159          unreachable("Empty semaphore");
4160          break;
4161       case RADV_SEMAPHORE_SYNCOBJ:
4162          counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
4163          break;
4164       case RADV_SEMAPHORE_TIMELINE: {
4165          mtx_lock(&sems[i]->timeline.mutex);
4166          struct radv_timeline_point *point = NULL;
4167          if (is_signal) {
4168             point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
4169          } else {
4170             point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline,
4171                                                              timeline_values[i]);
4172          }
4173 
4174          mtx_unlock(&sems[i]->timeline.mutex);
4175 
4176          if (point) {
4177             counts->syncobj[non_reset_idx++] = point->syncobj;
4178          } else {
4179             /* Explicitly remove the semaphore so we might not find
4180              * a point later post-submit. */
4181             sems[i] = NULL;
4182          }
4183          break;
4184       }
4185       case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4186          counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
4187          counts->points[timeline_idx] = timeline_values[i];
4188          ++timeline_idx;
4189          break;
4190       }
4191    }
4192 
4193    if (_fence != VK_NULL_HANDLE) {
4194       RADV_FROM_HANDLE(radv_fence, fence, _fence);
4195 
4196       struct radv_fence_part *part =
4197          fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
4198       counts->syncobj[non_reset_idx++] = part->syncobj;
4199    }
4200 
4201    assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
4202    counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
4203 
4204    return VK_SUCCESS;
4205 }
4206 
4207 static void
radv_free_sem_info(struct radv_winsys_sem_info * sem_info)4208 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
4209 {
4210    free(sem_info->wait.points);
4211    free(sem_info->signal.points);
4212 }
4213 
4214 static void
radv_free_temp_syncobjs(struct radv_device * device,int num_sems,struct radv_semaphore_part * sems)4215 radv_free_temp_syncobjs(struct radv_device *device, int num_sems, struct radv_semaphore_part *sems)
4216 {
4217    for (uint32_t i = 0; i < num_sems; i++) {
4218       radv_destroy_semaphore_part(device, sems + i);
4219    }
4220 }
4221 
4222 static VkResult
radv_alloc_sem_info(struct radv_device * device,struct radv_winsys_sem_info * sem_info,int num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,int num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,VkFence fence)4223 radv_alloc_sem_info(struct radv_device *device, struct radv_winsys_sem_info *sem_info,
4224                     int num_wait_sems, struct radv_semaphore_part **wait_sems,
4225                     const uint64_t *wait_values, int num_signal_sems,
4226                     struct radv_semaphore_part **signal_sems, const uint64_t *signal_values,
4227                     VkFence fence)
4228 {
4229    VkResult ret;
4230 
4231    ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values,
4232                                VK_NULL_HANDLE, false);
4233    if (ret)
4234       return ret;
4235    ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems,
4236                                signal_values, fence, true);
4237    if (ret)
4238       radv_free_sem_info(sem_info);
4239 
4240    /* caller can override these */
4241    sem_info->cs_emit_wait = true;
4242    sem_info->cs_emit_signal = true;
4243    return ret;
4244 }
4245 
4246 static void
radv_finalize_timelines(struct radv_device * device,uint32_t num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,uint32_t num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,struct list_head * processing_list)4247 radv_finalize_timelines(struct radv_device *device, uint32_t num_wait_sems,
4248                         struct radv_semaphore_part **wait_sems, const uint64_t *wait_values,
4249                         uint32_t num_signal_sems, struct radv_semaphore_part **signal_sems,
4250                         const uint64_t *signal_values, struct list_head *processing_list)
4251 {
4252    for (uint32_t i = 0; i < num_wait_sems; ++i) {
4253       if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4254          mtx_lock(&wait_sems[i]->timeline.mutex);
4255          struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
4256             device, &wait_sems[i]->timeline, wait_values[i]);
4257          point->wait_count -= 2;
4258          mtx_unlock(&wait_sems[i]->timeline.mutex);
4259       }
4260    }
4261    for (uint32_t i = 0; i < num_signal_sems; ++i) {
4262       if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4263          mtx_lock(&signal_sems[i]->timeline.mutex);
4264          struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
4265             device, &signal_sems[i]->timeline, signal_values[i]);
4266          signal_sems[i]->timeline.highest_submitted =
4267             MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4268          point->wait_count -= 2;
4269          radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4270          mtx_unlock(&signal_sems[i]->timeline.mutex);
4271       } else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
4272          signal_sems[i]->timeline_syncobj.max_point =
4273             MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
4274       }
4275    }
4276 }
4277 
4278 static VkResult
radv_sparse_buffer_bind_memory(struct radv_device * device,const VkSparseBufferMemoryBindInfo * bind)4279 radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
4280 {
4281    RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4282    VkResult result;
4283 
4284    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4285       struct radv_device_memory *mem = NULL;
4286 
4287       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4288          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4289 
4290       result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
4291                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4292                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4293       if (result != VK_SUCCESS)
4294          return result;
4295    }
4296 
4297    return VK_SUCCESS;
4298 }
4299 
4300 static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device * device,const VkSparseImageOpaqueMemoryBindInfo * bind)4301 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4302                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
4303 {
4304    RADV_FROM_HANDLE(radv_image, image, bind->image);
4305    VkResult result;
4306 
4307    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4308       struct radv_device_memory *mem = NULL;
4309 
4310       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4311          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4312 
4313       result = device->ws->buffer_virtual_bind(device->ws, image->bo,
4314                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4315                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4316       if (result != VK_SUCCESS)
4317          return result;
4318    }
4319 
4320    return VK_SUCCESS;
4321 }
4322 
4323 static VkResult
radv_sparse_image_bind_memory(struct radv_device * device,const VkSparseImageMemoryBindInfo * bind)4324 radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
4325 {
4326    RADV_FROM_HANDLE(radv_image, image, bind->image);
4327    struct radeon_surf *surface = &image->planes[0].surface;
4328    uint32_t bs = vk_format_get_blocksize(image->vk_format);
4329    VkResult result;
4330 
4331    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4332       struct radv_device_memory *mem = NULL;
4333       uint32_t offset, pitch;
4334       uint32_t mem_offset = bind->pBinds[i].memoryOffset;
4335       const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
4336       const uint32_t level = bind->pBinds[i].subresource.mipLevel;
4337 
4338       VkExtent3D bind_extent = bind->pBinds[i].extent;
4339       bind_extent.width =
4340          DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
4341       bind_extent.height =
4342          DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
4343 
4344       VkOffset3D bind_offset = bind->pBinds[i].offset;
4345       bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
4346       bind_offset.y /= vk_format_get_blockheight(image->vk_format);
4347 
4348       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4349          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4350 
4351       if (device->physical_device->rad_info.chip_class >= GFX9) {
4352          offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
4353          pitch = surface->u.gfx9.prt_level_pitch[level];
4354       } else {
4355          offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
4356                   surface->u.legacy.level[level].slice_size_dw * 4 * layer;
4357          pitch = surface->u.legacy.level[level].nblk_x;
4358       }
4359 
4360       offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
4361 
4362       uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
4363 
4364       bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
4365 
4366       if (whole_subres) {
4367          uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
4368 
4369          uint32_t size = aligned_extent_width * aligned_extent_height * bs;
4370          result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
4371                                                   mem ? mem->bo : NULL, mem_offset);
4372          if (result != VK_SUCCESS)
4373             return result;
4374       } else {
4375          uint32_t img_increment = pitch * bs;
4376          uint32_t mem_increment = aligned_extent_width * bs;
4377          uint32_t size = mem_increment * surface->prt_tile_height;
4378          for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
4379             result = device->ws->buffer_virtual_bind(
4380                device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
4381                mem_offset + mem_increment * y);
4382             if (result != VK_SUCCESS)
4383                return result;
4384          }
4385       }
4386    }
4387 
4388    return VK_SUCCESS;
4389 }
4390 
4391 static VkResult
radv_get_preambles(struct radv_queue * queue,const VkCommandBuffer * cmd_buffers,uint32_t cmd_buffer_count,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)4392 radv_get_preambles(struct radv_queue *queue, const VkCommandBuffer *cmd_buffers,
4393                    uint32_t cmd_buffer_count, struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4394                    struct radeon_cmdbuf **initial_preamble_cs,
4395                    struct radeon_cmdbuf **continue_preamble_cs)
4396 {
4397    uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4398    uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4399    uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4400    bool tess_rings_needed = false;
4401    bool gds_needed = false;
4402    bool gds_oa_needed = false;
4403    bool sample_positions_needed = false;
4404 
4405    for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4406       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffers[j]);
4407 
4408       scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4409       waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4410       compute_scratch_size_per_wave =
4411          MAX2(compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);
4412       compute_waves_wanted = MAX2(compute_waves_wanted, cmd_buffer->compute_scratch_waves_wanted);
4413       esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4414       gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4415       tess_rings_needed |= cmd_buffer->tess_rings_needed;
4416       gds_needed |= cmd_buffer->gds_needed;
4417       gds_oa_needed |= cmd_buffer->gds_oa_needed;
4418       sample_positions_needed |= cmd_buffer->sample_positions_needed;
4419    }
4420 
4421    return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4422                                compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size,
4423                                gsvs_ring_size, tess_rings_needed, gds_needed, gds_oa_needed,
4424                                sample_positions_needed, initial_full_flush_preamble_cs,
4425                                initial_preamble_cs, continue_preamble_cs);
4426 }
4427 
4428 struct radv_deferred_queue_submission {
4429    struct radv_queue *queue;
4430    VkCommandBuffer *cmd_buffers;
4431    uint32_t cmd_buffer_count;
4432 
4433    /* Sparse bindings that happen on a queue. */
4434    VkSparseBufferMemoryBindInfo *buffer_binds;
4435    uint32_t buffer_bind_count;
4436    VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4437    uint32_t image_opaque_bind_count;
4438    VkSparseImageMemoryBindInfo *image_binds;
4439    uint32_t image_bind_count;
4440 
4441    bool flush_caches;
4442    VkShaderStageFlags wait_dst_stage_mask;
4443    struct radv_semaphore_part **wait_semaphores;
4444    uint32_t wait_semaphore_count;
4445    struct radv_semaphore_part **signal_semaphores;
4446    uint32_t signal_semaphore_count;
4447    VkFence fence;
4448 
4449    uint64_t *wait_values;
4450    uint64_t *signal_values;
4451 
4452    struct radv_semaphore_part *temporary_semaphore_parts;
4453    uint32_t temporary_semaphore_part_count;
4454 
4455    struct list_head queue_pending_list;
4456    uint32_t submission_wait_count;
4457    struct radv_timeline_waiter *wait_nodes;
4458 
4459    struct list_head processing_list;
4460 };
4461 
4462 struct radv_queue_submission {
4463    const VkCommandBuffer *cmd_buffers;
4464    uint32_t cmd_buffer_count;
4465 
4466    /* Sparse bindings that happen on a queue. */
4467    const VkSparseBufferMemoryBindInfo *buffer_binds;
4468    uint32_t buffer_bind_count;
4469    const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4470    uint32_t image_opaque_bind_count;
4471    const VkSparseImageMemoryBindInfo *image_binds;
4472    uint32_t image_bind_count;
4473 
4474    bool flush_caches;
4475    VkPipelineStageFlags wait_dst_stage_mask;
4476    const VkSemaphore *wait_semaphores;
4477    uint32_t wait_semaphore_count;
4478    const VkSemaphore *signal_semaphores;
4479    uint32_t signal_semaphore_count;
4480    VkFence fence;
4481 
4482    const uint64_t *wait_values;
4483    uint32_t wait_value_count;
4484    const uint64_t *signal_values;
4485    uint32_t signal_value_count;
4486 };
4487 
4488 static VkResult radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4489                                               uint32_t decrement,
4490                                               struct list_head *processing_list);
4491 
4492 static VkResult
radv_create_deferred_submission(struct radv_queue * queue,const struct radv_queue_submission * submission,struct radv_deferred_queue_submission ** out)4493 radv_create_deferred_submission(struct radv_queue *queue,
4494                                 const struct radv_queue_submission *submission,
4495                                 struct radv_deferred_queue_submission **out)
4496 {
4497    struct radv_deferred_queue_submission *deferred = NULL;
4498    size_t size = sizeof(struct radv_deferred_queue_submission);
4499 
4500    uint32_t temporary_count = 0;
4501    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4502       RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4503       if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4504          ++temporary_count;
4505    }
4506 
4507    size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4508    size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4509    size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4510    size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
4511 
4512    for (uint32_t i = 0; i < submission->image_bind_count; ++i)
4513       size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
4514 
4515    size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4516    size += temporary_count * sizeof(struct radv_semaphore_part);
4517    size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4518    size += submission->wait_value_count * sizeof(uint64_t);
4519    size += submission->signal_value_count * sizeof(uint64_t);
4520    size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4521 
4522    deferred = calloc(1, size);
4523    if (!deferred)
4524       return VK_ERROR_OUT_OF_HOST_MEMORY;
4525 
4526    deferred->queue = queue;
4527 
4528    deferred->cmd_buffers = (void *)(deferred + 1);
4529    deferred->cmd_buffer_count = submission->cmd_buffer_count;
4530    if (submission->cmd_buffer_count) {
4531       memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4532              submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4533    }
4534 
4535    deferred->buffer_binds = (void *)(deferred->cmd_buffers + submission->cmd_buffer_count);
4536    deferred->buffer_bind_count = submission->buffer_bind_count;
4537    if (submission->buffer_bind_count) {
4538       memcpy(deferred->buffer_binds, submission->buffer_binds,
4539              submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4540    }
4541 
4542    deferred->image_opaque_binds = (void *)(deferred->buffer_binds + submission->buffer_bind_count);
4543    deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4544    if (submission->image_opaque_bind_count) {
4545       memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4546              submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4547    }
4548 
4549    deferred->image_binds =
4550       (void *)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4551    deferred->image_bind_count = submission->image_bind_count;
4552 
4553    VkSparseImageMemoryBind *sparse_image_binds =
4554       (void *)(deferred->image_binds + deferred->image_bind_count);
4555    for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
4556       deferred->image_binds[i] = submission->image_binds[i];
4557       deferred->image_binds[i].pBinds = sparse_image_binds;
4558 
4559       for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
4560          *sparse_image_binds++ = submission->image_binds[i].pBinds[j];
4561    }
4562 
4563    deferred->flush_caches = submission->flush_caches;
4564    deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4565 
4566    deferred->wait_semaphores = (void *)sparse_image_binds;
4567    deferred->wait_semaphore_count = submission->wait_semaphore_count;
4568 
4569    deferred->signal_semaphores =
4570       (void *)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4571    deferred->signal_semaphore_count = submission->signal_semaphore_count;
4572 
4573    deferred->fence = submission->fence;
4574 
4575    deferred->temporary_semaphore_parts =
4576       (void *)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4577    deferred->temporary_semaphore_part_count = temporary_count;
4578 
4579    uint32_t temporary_idx = 0;
4580    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4581       RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4582       if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4583          deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4584          deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4585          semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4586          ++temporary_idx;
4587       } else
4588          deferred->wait_semaphores[i] = &semaphore->permanent;
4589    }
4590 
4591    for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4592       RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4593       if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4594          deferred->signal_semaphores[i] = &semaphore->temporary;
4595       } else {
4596          deferred->signal_semaphores[i] = &semaphore->permanent;
4597       }
4598    }
4599 
4600    deferred->wait_values = (void *)(deferred->temporary_semaphore_parts + temporary_count);
4601    if (submission->wait_value_count) {
4602       memcpy(deferred->wait_values, submission->wait_values,
4603              submission->wait_value_count * sizeof(uint64_t));
4604    }
4605    deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4606    if (submission->signal_value_count) {
4607       memcpy(deferred->signal_values, submission->signal_values,
4608              submission->signal_value_count * sizeof(uint64_t));
4609    }
4610 
4611    deferred->wait_nodes = (void *)(deferred->signal_values + submission->signal_value_count);
4612    /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4613     * ensure the submission is not accidentally triggered early when adding wait timelines. */
4614    deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4615 
4616    *out = deferred;
4617    return VK_SUCCESS;
4618 }
4619 
4620 static VkResult
radv_queue_enqueue_submission(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4621 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4622                               struct list_head *processing_list)
4623 {
4624    uint32_t wait_cnt = 0;
4625    struct radv_timeline_waiter *waiter = submission->wait_nodes;
4626    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4627       if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4628          mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
4629          if (submission->wait_semaphores[i]->timeline.highest_submitted <
4630              submission->wait_values[i]) {
4631             ++wait_cnt;
4632             waiter->value = submission->wait_values[i];
4633             waiter->submission = submission;
4634             list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4635             ++waiter;
4636          }
4637          mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4638       }
4639    }
4640 
4641    mtx_lock(&submission->queue->pending_mutex);
4642 
4643    bool is_first = list_is_empty(&submission->queue->pending_submissions);
4644    list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4645 
4646    mtx_unlock(&submission->queue->pending_mutex);
4647 
4648    /* If there is already a submission in the queue, that will decrement the counter by 1 when
4649     * submitted, but if the queue was empty, we decrement ourselves as there is no previous
4650     * submission. */
4651    uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4652 
4653    /* if decrement is zero, then we don't have a refcounted reference to the
4654     * submission anymore, so it is not safe to access the submission. */
4655    if (!decrement)
4656       return VK_SUCCESS;
4657 
4658    return radv_queue_trigger_submission(submission, decrement, processing_list);
4659 }
4660 
4661 static void
radv_queue_submission_update_queue(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4662 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4663                                    struct list_head *processing_list)
4664 {
4665    mtx_lock(&submission->queue->pending_mutex);
4666    list_del(&submission->queue_pending_list);
4667 
4668    /* trigger the next submission in the queue. */
4669    if (!list_is_empty(&submission->queue->pending_submissions)) {
4670       struct radv_deferred_queue_submission *next_submission =
4671          list_first_entry(&submission->queue->pending_submissions,
4672                           struct radv_deferred_queue_submission, queue_pending_list);
4673       radv_queue_trigger_submission(next_submission, 1, processing_list);
4674    }
4675    mtx_unlock(&submission->queue->pending_mutex);
4676 
4677    u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
4678 }
4679 
4680 static VkResult
radv_queue_submit_deferred(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4681 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4682                            struct list_head *processing_list)
4683 {
4684    struct radv_queue *queue = submission->queue;
4685    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4686    uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4687    bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4688    bool can_patch = true;
4689    uint32_t advance;
4690    struct radv_winsys_sem_info sem_info = {0};
4691    VkResult result;
4692    struct radeon_cmdbuf *initial_preamble_cs = NULL;
4693    struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4694    struct radeon_cmdbuf *continue_preamble_cs = NULL;
4695 
4696    result =
4697       radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,
4698                          &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
4699    if (result != VK_SUCCESS)
4700       goto fail;
4701 
4702    result = radv_alloc_sem_info(queue->device, &sem_info, submission->wait_semaphore_count,
4703                                 submission->wait_semaphores, submission->wait_values,
4704                                 submission->signal_semaphore_count, submission->signal_semaphores,
4705                                 submission->signal_values, submission->fence);
4706    if (result != VK_SUCCESS)
4707       goto fail;
4708 
4709    for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4710       result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);
4711       if (result != VK_SUCCESS)
4712          goto fail;
4713    }
4714 
4715    for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4716       result =
4717          radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);
4718       if (result != VK_SUCCESS)
4719          goto fail;
4720    }
4721 
4722    for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
4723       result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);
4724       if (result != VK_SUCCESS)
4725          goto fail;
4726    }
4727 
4728    if (!submission->cmd_buffer_count) {
4729       result = queue->device->ws->cs_submit(ctx, queue->vk.index_in_family,
4730                                             &queue->device->empty_cs[queue->vk.queue_family_index], 1,
4731                                             NULL, NULL, &sem_info, false);
4732       if (result != VK_SUCCESS)
4733          goto fail;
4734    } else {
4735       struct radeon_cmdbuf **cs_array =
4736          malloc(sizeof(struct radeon_cmdbuf *) * (submission->cmd_buffer_count));
4737 
4738       for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4739          RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4740          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4741 
4742          cs_array[j] = cmd_buffer->cs;
4743          if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4744             can_patch = false;
4745 
4746          cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4747       }
4748 
4749       for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4750          struct radeon_cmdbuf *initial_preamble =
4751             (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4752          advance = MIN2(max_cs_submission, submission->cmd_buffer_count - j);
4753 
4754          if (queue->device->trace_bo)
4755             *queue->device->trace_id_ptr = 0;
4756 
4757          sem_info.cs_emit_wait = j == 0;
4758          sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4759 
4760          result = queue->device->ws->cs_submit(ctx, queue->vk.index_in_family, cs_array + j, advance,
4761                                                initial_preamble, continue_preamble_cs, &sem_info,
4762                                                can_patch);
4763          if (result != VK_SUCCESS) {
4764             free(cs_array);
4765             goto fail;
4766          }
4767 
4768          if (queue->device->trace_bo) {
4769             radv_check_gpu_hangs(queue, cs_array[j]);
4770          }
4771 
4772          if (queue->device->tma_bo) {
4773             radv_check_trap_handler(queue);
4774          }
4775       }
4776 
4777       free(cs_array);
4778    }
4779 
4780    radv_finalize_timelines(queue->device, submission->wait_semaphore_count,
4781                            submission->wait_semaphores, submission->wait_values,
4782                            submission->signal_semaphore_count, submission->signal_semaphores,
4783                            submission->signal_values, processing_list);
4784    /* Has to happen after timeline finalization to make sure the
4785     * condition variable is only triggered when timelines and queue have
4786     * been updated. */
4787    radv_queue_submission_update_queue(submission, processing_list);
4788 
4789 fail:
4790    if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4791       /* When something bad happened during the submission, such as
4792        * an out of memory issue, it might be hard to recover from
4793        * this inconsistent state. To avoid this sort of problem, we
4794        * assume that we are in a really bad situation and return
4795        * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4796        * to submit the same job again to this device.
4797        */
4798       result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
4799    }
4800 
4801    radv_free_temp_syncobjs(queue->device, submission->temporary_semaphore_part_count,
4802                            submission->temporary_semaphore_parts);
4803    radv_free_sem_info(&sem_info);
4804    free(submission);
4805    return result;
4806 }
4807 
4808 static VkResult
radv_process_submissions(struct list_head * processing_list)4809 radv_process_submissions(struct list_head *processing_list)
4810 {
4811    while (!list_is_empty(processing_list)) {
4812       struct radv_deferred_queue_submission *submission =
4813          list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4814       list_del(&submission->processing_list);
4815 
4816       VkResult result = radv_queue_submit_deferred(submission, processing_list);
4817       if (result != VK_SUCCESS)
4818          return result;
4819    }
4820    return VK_SUCCESS;
4821 }
4822 
4823 static VkResult
wait_for_submission_timelines_available(struct radv_deferred_queue_submission * submission,uint64_t timeout)4824 wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
4825                                         uint64_t timeout)
4826 {
4827    struct radv_device *device = submission->queue->device;
4828    uint32_t syncobj_count = 0;
4829    uint32_t syncobj_idx = 0;
4830 
4831    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4832       if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4833          continue;
4834 
4835       if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4836          continue;
4837       ++syncobj_count;
4838    }
4839 
4840    if (!syncobj_count)
4841       return VK_SUCCESS;
4842 
4843    uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
4844    if (!points)
4845       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4846 
4847    uint32_t *syncobj = (uint32_t *)(points + syncobj_count);
4848 
4849    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4850       if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4851          continue;
4852 
4853       if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4854          continue;
4855 
4856       syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
4857       points[syncobj_idx] = submission->wait_values[i];
4858       ++syncobj_idx;
4859    }
4860 
4861    bool success = true;
4862    if (syncobj_idx > 0) {
4863       success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
4864                                                   true, timeout);
4865    }
4866 
4867    free(points);
4868    return success ? VK_SUCCESS : VK_TIMEOUT;
4869 }
4870 
4871 static int
radv_queue_submission_thread_run(void * q)4872 radv_queue_submission_thread_run(void *q)
4873 {
4874    struct radv_queue *queue = q;
4875 
4876    mtx_lock(&queue->thread_mutex);
4877    while (!p_atomic_read(&queue->thread_exit)) {
4878       struct radv_deferred_queue_submission *submission = queue->thread_submission;
4879       struct list_head processing_list;
4880       VkResult result = VK_SUCCESS;
4881       if (!submission) {
4882          u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
4883          continue;
4884       }
4885       mtx_unlock(&queue->thread_mutex);
4886 
4887       /* Wait at most 5 seconds so we have a chance to notice shutdown when
4888        * a semaphore never gets signaled. If it takes longer we just retry
4889        * the wait next iteration. */
4890       result =
4891          wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(5000000000));
4892       if (result != VK_SUCCESS) {
4893          mtx_lock(&queue->thread_mutex);
4894          continue;
4895       }
4896 
4897       /* The lock isn't held but nobody will add one until we finish
4898        * the current submission. */
4899       p_atomic_set(&queue->thread_submission, NULL);
4900 
4901       list_inithead(&processing_list);
4902       list_addtail(&submission->processing_list, &processing_list);
4903       result = radv_process_submissions(&processing_list);
4904 
4905       mtx_lock(&queue->thread_mutex);
4906    }
4907    mtx_unlock(&queue->thread_mutex);
4908    return 0;
4909 }
4910 
4911 static VkResult
radv_queue_trigger_submission(struct radv_deferred_queue_submission * submission,uint32_t decrement,struct list_head * processing_list)4912 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission, uint32_t decrement,
4913                               struct list_head *processing_list)
4914 {
4915    struct radv_queue *queue = submission->queue;
4916    int ret;
4917    if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
4918       return VK_SUCCESS;
4919 
4920    if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) ==
4921        VK_SUCCESS) {
4922       list_addtail(&submission->processing_list, processing_list);
4923       return VK_SUCCESS;
4924    }
4925 
4926    mtx_lock(&queue->thread_mutex);
4927 
4928    /* A submission can only be ready for the thread if it doesn't have
4929     * any predecessors in the same queue, so there can only be one such
4930     * submission at a time. */
4931    assert(queue->thread_submission == NULL);
4932 
4933    /* Only start the thread on demand to save resources for the many games
4934     * which only use binary semaphores. */
4935    if (!queue->thread_running) {
4936       ret = thrd_create(&queue->submission_thread, radv_queue_submission_thread_run, queue);
4937       if (ret) {
4938          mtx_unlock(&queue->thread_mutex);
4939          return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
4940                           "Failed to start submission thread");
4941       }
4942       queue->thread_running = true;
4943    }
4944 
4945    queue->thread_submission = submission;
4946    mtx_unlock(&queue->thread_mutex);
4947 
4948    u_cnd_monotonic_signal(&queue->thread_cond);
4949    return VK_SUCCESS;
4950 }
4951 
4952 static VkResult
radv_queue_submit(struct radv_queue * queue,const struct radv_queue_submission * submission)4953 radv_queue_submit(struct radv_queue *queue, const struct radv_queue_submission *submission)
4954 {
4955    struct radv_deferred_queue_submission *deferred = NULL;
4956 
4957    VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4958    if (result != VK_SUCCESS)
4959       return result;
4960 
4961    struct list_head processing_list;
4962    list_inithead(&processing_list);
4963 
4964    result = radv_queue_enqueue_submission(deferred, &processing_list);
4965    if (result != VK_SUCCESS) {
4966       /* If anything is in the list we leak. */
4967       assert(list_is_empty(&processing_list));
4968       return result;
4969    }
4970    return radv_process_submissions(&processing_list);
4971 }
4972 
4973 bool
radv_queue_internal_submit(struct radv_queue * queue,struct radeon_cmdbuf * cs)4974 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4975 {
4976    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4977    struct radv_winsys_sem_info sem_info = {0};
4978    VkResult result;
4979 
4980    result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);
4981    if (result != VK_SUCCESS)
4982       return false;
4983 
4984    result =
4985       queue->device->ws->cs_submit(ctx, queue->vk.index_in_family, &cs, 1,
4986                                    NULL, NULL, &sem_info, false);
4987    radv_free_sem_info(&sem_info);
4988    if (result != VK_SUCCESS)
4989       return false;
4990 
4991    return true;
4992 }
4993 
4994 /* Signals fence as soon as all the work currently put on queue is done. */
4995 static VkResult
radv_signal_fence(struct radv_queue * queue,VkFence fence)4996 radv_signal_fence(struct radv_queue *queue, VkFence fence)
4997 {
4998    return radv_queue_submit(queue, &(struct radv_queue_submission){.fence = fence});
4999 }
5000 
5001 static bool
radv_submit_has_effects(const VkSubmitInfo * info)5002 radv_submit_has_effects(const VkSubmitInfo *info)
5003 {
5004    return info->commandBufferCount || info->waitSemaphoreCount || info->signalSemaphoreCount;
5005 }
5006 
5007 VkResult
radv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)5008 radv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence)
5009 {
5010    RADV_FROM_HANDLE(radv_queue, queue, _queue);
5011    VkResult result;
5012    uint32_t fence_idx = 0;
5013    bool flushed_caches = false;
5014 
5015    if (radv_device_is_lost(queue->device))
5016       return VK_ERROR_DEVICE_LOST;
5017 
5018    if (fence != VK_NULL_HANDLE) {
5019       for (uint32_t i = 0; i < submitCount; ++i)
5020          if (radv_submit_has_effects(pSubmits + i))
5021             fence_idx = i;
5022    } else
5023       fence_idx = UINT32_MAX;
5024 
5025    for (uint32_t i = 0; i < submitCount; i++) {
5026       if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
5027          continue;
5028 
5029       VkPipelineStageFlags wait_dst_stage_mask = 0;
5030       for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
5031          wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
5032       }
5033 
5034       const VkTimelineSemaphoreSubmitInfo *timeline_info =
5035          vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5036 
5037       result = radv_queue_submit(
5038          queue, &(struct radv_queue_submission){
5039                    .cmd_buffers = pSubmits[i].pCommandBuffers,
5040                    .cmd_buffer_count = pSubmits[i].commandBufferCount,
5041                    .wait_dst_stage_mask = wait_dst_stage_mask,
5042                    .flush_caches = !flushed_caches,
5043                    .wait_semaphores = pSubmits[i].pWaitSemaphores,
5044                    .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
5045                    .signal_semaphores = pSubmits[i].pSignalSemaphores,
5046                    .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
5047                    .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5048                    .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5049                    .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
5050                                           ? timeline_info->waitSemaphoreValueCount
5051                                           : 0,
5052                    .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5053                    .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
5054                                             ? timeline_info->signalSemaphoreValueCount
5055                                             : 0,
5056                 });
5057       if (result != VK_SUCCESS)
5058          return result;
5059 
5060       flushed_caches = true;
5061    }
5062 
5063    if (fence != VK_NULL_HANDLE && !submitCount) {
5064       result = radv_signal_fence(queue, fence);
5065       if (result != VK_SUCCESS)
5066          return result;
5067    }
5068 
5069    return VK_SUCCESS;
5070 }
5071 
5072 static const char *
radv_get_queue_family_name(struct radv_queue * queue)5073 radv_get_queue_family_name(struct radv_queue *queue)
5074 {
5075    switch (queue->vk.queue_family_index) {
5076    case RADV_QUEUE_GENERAL:
5077       return "graphics";
5078    case RADV_QUEUE_COMPUTE:
5079       return "compute";
5080    case RADV_QUEUE_TRANSFER:
5081       return "transfer";
5082    default:
5083       unreachable("Unknown queue family");
5084    }
5085 }
5086 
5087 VkResult
radv_QueueWaitIdle(VkQueue _queue)5088 radv_QueueWaitIdle(VkQueue _queue)
5089 {
5090    RADV_FROM_HANDLE(radv_queue, queue, _queue);
5091 
5092    if (radv_device_is_lost(queue->device))
5093       return VK_ERROR_DEVICE_LOST;
5094 
5095    mtx_lock(&queue->pending_mutex);
5096    while (!list_is_empty(&queue->pending_submissions)) {
5097       u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
5098    }
5099    mtx_unlock(&queue->pending_mutex);
5100 
5101    if (!queue->device->ws->ctx_wait_idle(
5102           queue->hw_ctx, radv_queue_family_to_ring(queue->vk.queue_family_index),
5103           queue->vk.index_in_family)) {
5104       return radv_device_set_lost(queue->device,
5105                                   "Failed to wait for a '%s' queue "
5106                                   "to be idle. GPU hang ?",
5107                                   radv_get_queue_family_name(queue));
5108    }
5109 
5110    return VK_SUCCESS;
5111 }
5112 
5113 VkResult
radv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)5114 radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
5115                                           VkExtensionProperties *pProperties)
5116 {
5117    if (pLayerName)
5118       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
5119 
5120    return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
5121                                                      pPropertyCount, pProperties);
5122 }
5123 
5124 PFN_vkVoidFunction
radv_GetInstanceProcAddr(VkInstance _instance,const char * pName)5125 radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
5126 {
5127    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5128 
5129    /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5130     * when we have to return valid function pointers, NULL, or it's left
5131     * undefined.  See the table for exact details.
5132     */
5133    if (pName == NULL)
5134       return NULL;
5135 
5136 #define LOOKUP_RADV_ENTRYPOINT(entrypoint)                                                         \
5137    if (strcmp(pName, "vk" #entrypoint) == 0)                                                       \
5138    return (PFN_vkVoidFunction)radv_##entrypoint
5139 
5140    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5141    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5142    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5143    LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5144 
5145    /* GetInstanceProcAddr() can also be called with a NULL instance.
5146     * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5147     */
5148    LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5149 
5150 #undef LOOKUP_RADV_ENTRYPOINT
5151 
5152    if (instance == NULL)
5153       return NULL;
5154 
5155    return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
5156 }
5157 
5158 /* Windows will use a dll definition file to avoid build errors. */
5159 #ifdef _WIN32
5160 #undef PUBLIC
5161 #define PUBLIC
5162 #endif
5163 
5164 /* The loader wants us to expose a second GetInstanceProcAddr function
5165  * to work around certain LD_PRELOAD issues seen in apps.
5166  */
5167 PUBLIC
5168 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)5169 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
5170 {
5171    return radv_GetInstanceProcAddr(instance, pName);
5172 }
5173 
5174 PUBLIC
5175 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)5176 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
5177 {
5178    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5179    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
5180 }
5181 
5182 bool
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)5183 radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
5184 {
5185    /* Only set BO metadata for the first plane */
5186    if (memory->image && memory->image->offset == 0) {
5187       struct radeon_bo_metadata metadata;
5188       radv_init_metadata(device, memory->image, &metadata);
5189       device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
5190    }
5191 
5192    return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
5193 }
5194 
5195 void
radv_device_memory_init(struct radv_device_memory * mem,struct radv_device * device,struct radeon_winsys_bo * bo)5196 radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
5197                         struct radeon_winsys_bo *bo)
5198 {
5199    memset(mem, 0, sizeof(*mem));
5200    vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
5201 
5202    mem->bo = bo;
5203 }
5204 
5205 void
radv_device_memory_finish(struct radv_device_memory * mem)5206 radv_device_memory_finish(struct radv_device_memory *mem)
5207 {
5208    vk_object_base_finish(&mem->base);
5209 }
5210 
5211 void
radv_free_memory(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_device_memory * mem)5212 radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5213                  struct radv_device_memory *mem)
5214 {
5215    if (mem == NULL)
5216       return;
5217 
5218 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5219    if (mem->android_hardware_buffer)
5220       AHardwareBuffer_release(mem->android_hardware_buffer);
5221 #endif
5222 
5223    if (mem->bo) {
5224       if (device->overallocation_disallowed) {
5225          mtx_lock(&device->overallocation_mutex);
5226          device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5227          mtx_unlock(&device->overallocation_mutex);
5228       }
5229 
5230       if (device->use_global_bo_list)
5231          device->ws->buffer_make_resident(device->ws, mem->bo, false);
5232       device->ws->buffer_destroy(device->ws, mem->bo);
5233       mem->bo = NULL;
5234    }
5235 
5236    radv_device_memory_finish(mem);
5237    vk_free2(&device->vk.alloc, pAllocator, mem);
5238 }
5239 
5240 static VkResult
radv_alloc_memory(struct radv_device * device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5241 radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
5242                   const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5243 {
5244    struct radv_device_memory *mem;
5245    VkResult result;
5246    enum radeon_bo_domain domain;
5247    uint32_t flags = 0;
5248 
5249    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5250 
5251    const VkImportMemoryFdInfoKHR *import_info =
5252       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5253    const VkMemoryDedicatedAllocateInfo *dedicate_info =
5254       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5255    const VkExportMemoryAllocateInfo *export_info =
5256       vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5257    const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5258       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5259    const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5260       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5261 
5262    const struct wsi_memory_allocate_info *wsi_info =
5263       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5264 
5265    if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5266        !(export_info && (export_info->handleTypes &
5267                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5268       /* Apparently, this is allowed */
5269       *pMem = VK_NULL_HANDLE;
5270       return VK_SUCCESS;
5271    }
5272 
5273    mem =
5274       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5275    if (mem == NULL)
5276       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5277 
5278    radv_device_memory_init(mem, device, NULL);
5279 
5280    if (wsi_info) {
5281       if(wsi_info->implicit_sync)
5282          flags |= RADEON_FLAG_IMPLICIT_SYNC;
5283 
5284       /* In case of prime, linear buffer is allocated in default heap which is VRAM.
5285        * Due to this when display is connected to iGPU and render on dGPU, ddx
5286        * function amdgpu_present_check_flip() fails due to which there is blit
5287        * instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to
5288        * allocate GTT memory in supported hardware where GTT can be directly scanout.
5289        * Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that
5290        * only for memory allocated by driver this flag is set.
5291        */
5292       flags |= RADEON_FLAG_GTT_WC;
5293    }
5294 
5295    if (dedicate_info) {
5296       mem->image = radv_image_from_handle(dedicate_info->image);
5297       mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5298    } else {
5299       mem->image = NULL;
5300       mem->buffer = NULL;
5301    }
5302 
5303    float priority_float = 0.5;
5304    const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5305       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5306    if (priority_ext)
5307       priority_float = priority_ext->priority;
5308 
5309    uint64_t replay_address = 0;
5310    const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
5311       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
5312    if (replay_info && replay_info->opaqueCaptureAddress)
5313       replay_address = replay_info->opaqueCaptureAddress;
5314 
5315    unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5316                             (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5317 
5318    mem->user_ptr = NULL;
5319 
5320 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5321    mem->android_hardware_buffer = NULL;
5322 #endif
5323 
5324    if (ahb_import_info) {
5325       result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5326       if (result != VK_SUCCESS)
5327          goto fail;
5328    } else if (export_info && (export_info->handleTypes &
5329                               VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5330       result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5331       if (result != VK_SUCCESS)
5332          goto fail;
5333    } else if (import_info) {
5334       assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5335              import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5336       result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL);
5337       if (result != VK_SUCCESS) {
5338          goto fail;
5339       } else {
5340          close(import_info->fd);
5341       }
5342 
5343       if (mem->image && mem->image->plane_count == 1 &&
5344           !vk_format_is_depth_or_stencil(mem->image->vk_format) && mem->image->info.samples == 1 &&
5345           mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
5346          struct radeon_bo_metadata metadata;
5347          device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
5348 
5349          struct radv_image_create_info create_info = {.no_metadata_planes = true,
5350                                                       .bo_metadata = &metadata};
5351 
5352          /* This gives a basic ability to import radeonsi images
5353           * that don't have DCC. This is not guaranteed by any
5354           * spec and can be removed after we support modifiers. */
5355          result = radv_image_create_layout(device, create_info, NULL, mem->image);
5356          if (result != VK_SUCCESS) {
5357             device->ws->buffer_destroy(device->ws, mem->bo);
5358             goto fail;
5359          }
5360       }
5361    } else if (host_ptr_info) {
5362       assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5363       result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5364                                            pAllocateInfo->allocationSize, priority, &mem->bo);
5365       if (result != VK_SUCCESS) {
5366          goto fail;
5367       } else {
5368          mem->user_ptr = host_ptr_info->pHostPointer;
5369       }
5370    } else {
5371       uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5372       uint32_t heap_index;
5373 
5374       heap_index =
5375          device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
5376             .heapIndex;
5377       domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5378       flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5379 
5380       if (!import_info && (!export_info || !export_info->handleTypes)) {
5381          flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5382          if (device->use_global_bo_list) {
5383             flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5384          }
5385       }
5386 
5387       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
5388       if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5389          flags |= RADEON_FLAG_REPLAYABLE;
5390 
5391       if (device->overallocation_disallowed) {
5392          uint64_t total_size =
5393             device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5394 
5395          mtx_lock(&device->overallocation_mutex);
5396          if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5397             mtx_unlock(&device->overallocation_mutex);
5398             result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5399             goto fail;
5400          }
5401          device->allocated_memory_size[heap_index] += alloc_size;
5402          mtx_unlock(&device->overallocation_mutex);
5403       }
5404 
5405       result = device->ws->buffer_create(device->ws, alloc_size,
5406                                          device->physical_device->rad_info.max_alignment, domain,
5407                                          flags, priority, replay_address, &mem->bo);
5408 
5409       if (result != VK_SUCCESS) {
5410          if (device->overallocation_disallowed) {
5411             mtx_lock(&device->overallocation_mutex);
5412             device->allocated_memory_size[heap_index] -= alloc_size;
5413             mtx_unlock(&device->overallocation_mutex);
5414          }
5415          goto fail;
5416       }
5417 
5418       mem->heap_index = heap_index;
5419       mem->alloc_size = alloc_size;
5420    }
5421 
5422    if (!wsi_info) {
5423       if (device->use_global_bo_list) {
5424          result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
5425          if (result != VK_SUCCESS)
5426             goto fail;
5427       }
5428    }
5429 
5430    *pMem = radv_device_memory_to_handle(mem);
5431 
5432    return VK_SUCCESS;
5433 
5434 fail:
5435    radv_free_memory(device, pAllocator, mem);
5436 
5437    return result;
5438 }
5439 
5440 VkResult
radv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5441 radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
5442                     const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5443 {
5444    RADV_FROM_HANDLE(radv_device, device, _device);
5445    return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5446 }
5447 
5448 void
radv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)5449 radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
5450 {
5451    RADV_FROM_HANDLE(radv_device, device, _device);
5452    RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5453 
5454    radv_free_memory(device, pAllocator, mem);
5455 }
5456 
5457 VkResult
radv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)5458 radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
5459                VkMemoryMapFlags flags, void **ppData)
5460 {
5461    RADV_FROM_HANDLE(radv_device, device, _device);
5462    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5463 
5464    if (mem == NULL) {
5465       *ppData = NULL;
5466       return VK_SUCCESS;
5467    }
5468 
5469    if (mem->user_ptr)
5470       *ppData = mem->user_ptr;
5471    else
5472       *ppData = device->ws->buffer_map(mem->bo);
5473 
5474    if (*ppData) {
5475       *ppData = (uint8_t *)*ppData + offset;
5476       return VK_SUCCESS;
5477    }
5478 
5479    return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
5480 }
5481 
5482 void
radv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)5483 radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
5484 {
5485    RADV_FROM_HANDLE(radv_device, device, _device);
5486    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5487 
5488    if (mem == NULL)
5489       return;
5490 
5491    if (mem->user_ptr == NULL)
5492       device->ws->buffer_unmap(mem->bo);
5493 }
5494 
5495 VkResult
radv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5496 radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5497                              const VkMappedMemoryRange *pMemoryRanges)
5498 {
5499    return VK_SUCCESS;
5500 }
5501 
5502 VkResult
radv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5503 radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5504                                   const VkMappedMemoryRange *pMemoryRanges)
5505 {
5506    return VK_SUCCESS;
5507 }
5508 
5509 static void
radv_get_buffer_memory_requirements(struct radv_device * device,VkDeviceSize size,VkBufferCreateFlags flags,VkMemoryRequirements2 * pMemoryRequirements)5510 radv_get_buffer_memory_requirements(struct radv_device *device,
5511                                     VkDeviceSize size,
5512                                     VkBufferCreateFlags flags,
5513                                     VkMemoryRequirements2 *pMemoryRequirements)
5514 {
5515    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5516       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5517 
5518    if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5519       pMemoryRequirements->memoryRequirements.alignment = 4096;
5520    else
5521       pMemoryRequirements->memoryRequirements.alignment = 16;
5522 
5523    pMemoryRequirements->memoryRequirements.size =
5524       align64(size, pMemoryRequirements->memoryRequirements.alignment);
5525 
5526    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5527    {
5528       switch (ext->sType) {
5529       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5530          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5531          req->requiresDedicatedAllocation = false;
5532          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5533          break;
5534       }
5535       default:
5536          break;
5537       }
5538    }
5539 }
5540 
5541 void
radv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5542 radv_GetBufferMemoryRequirements2(VkDevice _device, const VkBufferMemoryRequirementsInfo2 *pInfo,
5543                                   VkMemoryRequirements2 *pMemoryRequirements)
5544 {
5545    RADV_FROM_HANDLE(radv_device, device, _device);
5546    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5547 
5548    radv_get_buffer_memory_requirements(device, buffer->size, buffer->flags, pMemoryRequirements);
5549 }
5550 
5551 void
radv_GetDeviceBufferMemoryRequirementsKHR(VkDevice _device,const VkDeviceBufferMemoryRequirementsKHR * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5552 radv_GetDeviceBufferMemoryRequirementsKHR(VkDevice _device,
5553                                           const VkDeviceBufferMemoryRequirementsKHR* pInfo,
5554                                           VkMemoryRequirements2 *pMemoryRequirements)
5555 {
5556    RADV_FROM_HANDLE(radv_device, device, _device);
5557 
5558    radv_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pInfo->pCreateInfo->flags,
5559                                        pMemoryRequirements);
5560 }
5561 
5562 void
radv_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5563 radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo,
5564                                  VkMemoryRequirements2 *pMemoryRequirements)
5565 {
5566    RADV_FROM_HANDLE(radv_device, device, _device);
5567    RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5568 
5569    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5570       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5571 
5572    pMemoryRequirements->memoryRequirements.size = image->size;
5573    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
5574 
5575    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5576    {
5577       switch (ext->sType) {
5578       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5579          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5580          req->requiresDedicatedAllocation =
5581             image->shareable && image->tiling != VK_IMAGE_TILING_LINEAR;
5582          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5583          break;
5584       }
5585       default:
5586          break;
5587       }
5588    }
5589 }
5590 
5591 void
radv_GetDeviceImageMemoryRequirementsKHR(VkDevice device,const VkDeviceImageMemoryRequirementsKHR * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5592 radv_GetDeviceImageMemoryRequirementsKHR(VkDevice device,
5593                                          const VkDeviceImageMemoryRequirementsKHR *pInfo,
5594                                          VkMemoryRequirements2 *pMemoryRequirements)
5595 {
5596    UNUSED VkResult result;
5597    VkImage image;
5598 
5599    /* Determining the image size/alignment require to create a surface, which is complicated without
5600     * creating an image.
5601     * TODO: Avoid creating an image.
5602     */
5603    result = radv_CreateImage(device, pInfo->pCreateInfo, NULL, &image);
5604    assert(result == VK_SUCCESS);
5605 
5606    VkImageMemoryRequirementsInfo2 info2 = {
5607       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5608       .image = image,
5609    };
5610 
5611    radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements);
5612 
5613    radv_DestroyImage(device, image, NULL);
5614 }
5615 
5616 void
radv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)5617 radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
5618                                VkDeviceSize *pCommittedMemoryInBytes)
5619 {
5620    *pCommittedMemoryInBytes = 0;
5621 }
5622 
5623 VkResult
radv_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)5624 radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
5625                        const VkBindBufferMemoryInfo *pBindInfos)
5626 {
5627    RADV_FROM_HANDLE(radv_device, device, _device);
5628 
5629    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5630       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5631       RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5632 
5633       if (mem) {
5634          if (mem->alloc_size) {
5635             VkBufferMemoryRequirementsInfo2 info = {
5636                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
5637                .buffer = pBindInfos[i].buffer,
5638             };
5639             VkMemoryRequirements2 reqs = {
5640                .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5641             };
5642 
5643             radv_GetBufferMemoryRequirements2(_device, &info, &reqs);
5644 
5645             if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5646                return vk_errorf(device, VK_ERROR_UNKNOWN,
5647                                 "Device memory object too small for the buffer.\n");
5648             }
5649          }
5650 
5651          buffer->bo = mem->bo;
5652          buffer->offset = pBindInfos[i].memoryOffset;
5653       } else {
5654          buffer->bo = NULL;
5655       }
5656    }
5657    return VK_SUCCESS;
5658 }
5659 
5660 VkResult
radv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)5661 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,
5662                       const VkBindImageMemoryInfo *pBindInfos)
5663 {
5664    RADV_FROM_HANDLE(radv_device, device, _device);
5665 
5666    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5667       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5668       RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5669 
5670       if (mem) {
5671          if (mem->alloc_size) {
5672             VkImageMemoryRequirementsInfo2 info = {
5673                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5674                .image = pBindInfos[i].image,
5675             };
5676             VkMemoryRequirements2 reqs = {
5677                .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5678             };
5679 
5680             radv_GetImageMemoryRequirements2(_device, &info, &reqs);
5681 
5682             if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5683                return vk_errorf(device, VK_ERROR_UNKNOWN,
5684                                 "Device memory object too small for the image.\n");
5685             }
5686          }
5687 
5688          image->bo = mem->bo;
5689          image->offset = pBindInfos[i].memoryOffset;
5690       } else {
5691          image->bo = NULL;
5692          image->offset = 0;
5693       }
5694    }
5695    return VK_SUCCESS;
5696 }
5697 
5698 static bool
radv_sparse_bind_has_effects(const VkBindSparseInfo * info)5699 radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5700 {
5701    return info->bufferBindCount || info->imageOpaqueBindCount || info->imageBindCount ||
5702           info->waitSemaphoreCount || info->signalSemaphoreCount;
5703 }
5704 
5705 VkResult
radv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)5706 radv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo,
5707                      VkFence fence)
5708 {
5709    RADV_FROM_HANDLE(radv_queue, queue, _queue);
5710    uint32_t fence_idx = 0;
5711 
5712    if (radv_device_is_lost(queue->device))
5713       return VK_ERROR_DEVICE_LOST;
5714 
5715    if (fence != VK_NULL_HANDLE) {
5716       for (uint32_t i = 0; i < bindInfoCount; ++i)
5717          if (radv_sparse_bind_has_effects(pBindInfo + i))
5718             fence_idx = i;
5719    } else
5720       fence_idx = UINT32_MAX;
5721 
5722    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5723       if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5724          continue;
5725 
5726       const VkTimelineSemaphoreSubmitInfo *timeline_info =
5727          vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5728 
5729       VkResult result = radv_queue_submit(
5730          queue, &(struct radv_queue_submission){
5731                    .buffer_binds = pBindInfo[i].pBufferBinds,
5732                    .buffer_bind_count = pBindInfo[i].bufferBindCount,
5733                    .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5734                    .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5735                    .image_binds = pBindInfo[i].pImageBinds,
5736                    .image_bind_count = pBindInfo[i].imageBindCount,
5737                    .wait_semaphores = pBindInfo[i].pWaitSemaphores,
5738                    .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5739                    .signal_semaphores = pBindInfo[i].pSignalSemaphores,
5740                    .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5741                    .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5742                    .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5743                    .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
5744                                           ? timeline_info->waitSemaphoreValueCount
5745                                           : 0,
5746                    .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5747                    .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
5748                                             ? timeline_info->signalSemaphoreValueCount
5749                                             : 0,
5750                 });
5751 
5752       if (result != VK_SUCCESS)
5753          return result;
5754    }
5755 
5756    if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5757       VkResult result = radv_signal_fence(queue, fence);
5758       if (result != VK_SUCCESS)
5759          return result;
5760    }
5761 
5762    return VK_SUCCESS;
5763 }
5764 
5765 static void
radv_destroy_fence_part(struct radv_device * device,struct radv_fence_part * part)5766 radv_destroy_fence_part(struct radv_device *device, struct radv_fence_part *part)
5767 {
5768    if (part->kind != RADV_FENCE_NONE)
5769       device->ws->destroy_syncobj(device->ws, part->syncobj);
5770    part->kind = RADV_FENCE_NONE;
5771 }
5772 
5773 static void
radv_destroy_fence(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_fence * fence)5774 radv_destroy_fence(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5775                    struct radv_fence *fence)
5776 {
5777    radv_destroy_fence_part(device, &fence->temporary);
5778    radv_destroy_fence_part(device, &fence->permanent);
5779 
5780    vk_object_base_finish(&fence->base);
5781    vk_free2(&device->vk.alloc, pAllocator, fence);
5782 }
5783 
5784 VkResult
radv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)5785 radv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo,
5786                  const VkAllocationCallbacks *pAllocator, VkFence *pFence)
5787 {
5788    RADV_FROM_HANDLE(radv_device, device, _device);
5789    bool create_signaled = false;
5790    struct radv_fence *fence;
5791    int ret;
5792 
5793    fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
5794                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5795    if (!fence)
5796       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5797 
5798    vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5799 
5800    fence->permanent.kind = RADV_FENCE_SYNCOBJ;
5801 
5802    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5803       create_signaled = true;
5804 
5805    ret = device->ws->create_syncobj(device->ws, create_signaled, &fence->permanent.syncobj);
5806    if (ret) {
5807       radv_destroy_fence(device, pAllocator, fence);
5808       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5809    }
5810 
5811    *pFence = radv_fence_to_handle(fence);
5812 
5813    return VK_SUCCESS;
5814 }
5815 
5816 void
radv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)5817 radv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator)
5818 {
5819    RADV_FROM_HANDLE(radv_device, device, _device);
5820    RADV_FROM_HANDLE(radv_fence, fence, _fence);
5821 
5822    if (!fence)
5823       return;
5824 
5825    radv_destroy_fence(device, pAllocator, fence);
5826 }
5827 
5828 VkResult
radv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)5829 radv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll,
5830                    uint64_t timeout)
5831 {
5832    RADV_FROM_HANDLE(radv_device, device, _device);
5833    uint32_t *handles;
5834 
5835    if (radv_device_is_lost(device))
5836       return VK_ERROR_DEVICE_LOST;
5837 
5838    timeout = radv_get_absolute_timeout(timeout);
5839 
5840    handles = malloc(sizeof(uint32_t) * fenceCount);
5841    if (!handles)
5842       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5843 
5844    for (uint32_t i = 0; i < fenceCount; ++i) {
5845       RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5846 
5847       struct radv_fence_part *part =
5848          fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
5849 
5850       assert(part->kind == RADV_FENCE_SYNCOBJ);
5851       handles[i] = part->syncobj;
5852    }
5853 
5854    bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5855    free(handles);
5856    return success ? VK_SUCCESS : VK_TIMEOUT;
5857 }
5858 
5859 VkResult
radv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)5860 radv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
5861 {
5862    RADV_FROM_HANDLE(radv_device, device, _device);
5863 
5864    for (unsigned i = 0; i < fenceCount; ++i) {
5865       RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5866 
5867       /* From the Vulkan 1.0.53 spec:
5868        *
5869        *    "If any member of pFences currently has its payload
5870        *    imported with temporary permanence, that fence’s prior
5871        *    permanent payload is irst restored. The remaining
5872        *    operations described therefore operate on the restored
5873        *    payload."
5874        */
5875       if (fence->temporary.kind != RADV_FENCE_NONE)
5876          radv_destroy_fence_part(device, &fence->temporary);
5877 
5878       device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
5879    }
5880 
5881    return VK_SUCCESS;
5882 }
5883 
5884 VkResult
radv_GetFenceStatus(VkDevice _device,VkFence _fence)5885 radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5886 {
5887    RADV_FROM_HANDLE(radv_device, device, _device);
5888    RADV_FROM_HANDLE(radv_fence, fence, _fence);
5889 
5890    struct radv_fence_part *part =
5891       fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
5892 
5893    if (radv_device_is_lost(device))
5894       return VK_ERROR_DEVICE_LOST;
5895 
5896    bool success = device->ws->wait_syncobj(device->ws, &part->syncobj, 1, true, 0);
5897    return success ? VK_SUCCESS : VK_NOT_READY;
5898 }
5899 
5900 // Queue semaphore functions
5901 
5902 static void
radv_create_timeline(struct radv_timeline * timeline,uint64_t value)5903 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
5904 {
5905    timeline->highest_signaled = value;
5906    timeline->highest_submitted = value;
5907    list_inithead(&timeline->points);
5908    list_inithead(&timeline->free_points);
5909    list_inithead(&timeline->waiters);
5910    mtx_init(&timeline->mutex, mtx_plain);
5911 }
5912 
5913 static void
radv_destroy_timeline(struct radv_device * device,struct radv_timeline * timeline)5914 radv_destroy_timeline(struct radv_device *device, struct radv_timeline *timeline)
5915 {
5916    list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->free_points, list)
5917    {
5918       list_del(&point->list);
5919       device->ws->destroy_syncobj(device->ws, point->syncobj);
5920       free(point);
5921    }
5922    list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
5923    {
5924       list_del(&point->list);
5925       device->ws->destroy_syncobj(device->ws, point->syncobj);
5926       free(point);
5927    }
5928    mtx_destroy(&timeline->mutex);
5929 }
5930 
5931 static void
radv_timeline_gc_locked(struct radv_device * device,struct radv_timeline * timeline)5932 radv_timeline_gc_locked(struct radv_device *device, struct radv_timeline *timeline)
5933 {
5934    list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
5935    {
5936       if (point->wait_count || point->value > timeline->highest_submitted)
5937          return;
5938 
5939       if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
5940          timeline->highest_signaled = point->value;
5941          list_del(&point->list);
5942          list_add(&point->list, &timeline->free_points);
5943       }
5944    }
5945 }
5946 
5947 static struct radv_timeline_point *
radv_timeline_find_point_at_least_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)5948 radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
5949                                          uint64_t p)
5950 {
5951    radv_timeline_gc_locked(device, timeline);
5952 
5953    if (p <= timeline->highest_signaled)
5954       return NULL;
5955 
5956    list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
5957    {
5958       if (point->value >= p) {
5959          ++point->wait_count;
5960          return point;
5961       }
5962    }
5963    return NULL;
5964 }
5965 
5966 static struct radv_timeline_point *
radv_timeline_add_point_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)5967 radv_timeline_add_point_locked(struct radv_device *device, struct radv_timeline *timeline,
5968                                uint64_t p)
5969 {
5970    radv_timeline_gc_locked(device, timeline);
5971 
5972    struct radv_timeline_point *ret = NULL;
5973    struct radv_timeline_point *prev = NULL;
5974    int r;
5975 
5976    if (p <= timeline->highest_signaled)
5977       return NULL;
5978 
5979    list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
5980    {
5981       if (point->value == p) {
5982          return NULL;
5983       }
5984 
5985       if (point->value < p)
5986          prev = point;
5987    }
5988 
5989    if (list_is_empty(&timeline->free_points)) {
5990       ret = malloc(sizeof(struct radv_timeline_point));
5991       r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
5992       if (r) {
5993          free(ret);
5994          return NULL;
5995       }
5996    } else {
5997       ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
5998       list_del(&ret->list);
5999 
6000       device->ws->reset_syncobj(device->ws, ret->syncobj);
6001    }
6002 
6003    ret->value = p;
6004    ret->wait_count = 1;
6005 
6006    if (prev) {
6007       list_add(&ret->list, &prev->list);
6008    } else {
6009       list_addtail(&ret->list, &timeline->points);
6010    }
6011    return ret;
6012 }
6013 
6014 static VkResult
radv_timeline_wait(struct radv_device * device,struct radv_timeline * timeline,uint64_t value,uint64_t abs_timeout)6015 radv_timeline_wait(struct radv_device *device, struct radv_timeline *timeline, uint64_t value,
6016                    uint64_t abs_timeout)
6017 {
6018    mtx_lock(&timeline->mutex);
6019 
6020    while (timeline->highest_submitted < value) {
6021       struct timespec abstime;
6022       timespec_from_nsec(&abstime, abs_timeout);
6023 
6024       u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
6025 
6026       if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
6027          mtx_unlock(&timeline->mutex);
6028          return VK_TIMEOUT;
6029       }
6030    }
6031 
6032    struct radv_timeline_point *point =
6033       radv_timeline_find_point_at_least_locked(device, timeline, value);
6034    mtx_unlock(&timeline->mutex);
6035    if (!point)
6036       return VK_SUCCESS;
6037 
6038    bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
6039 
6040    mtx_lock(&timeline->mutex);
6041    point->wait_count--;
6042    mtx_unlock(&timeline->mutex);
6043    return success ? VK_SUCCESS : VK_TIMEOUT;
6044 }
6045 
6046 static void
radv_timeline_trigger_waiters_locked(struct radv_timeline * timeline,struct list_head * processing_list)6047 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
6048                                      struct list_head *processing_list)
6049 {
6050    list_for_each_entry_safe(struct radv_timeline_waiter, waiter, &timeline->waiters, list)
6051    {
6052       if (waiter->value > timeline->highest_submitted)
6053          continue;
6054 
6055       radv_queue_trigger_submission(waiter->submission, 1, processing_list);
6056       list_del(&waiter->list);
6057    }
6058 }
6059 
6060 static void
radv_destroy_semaphore_part(struct radv_device * device,struct radv_semaphore_part * part)6061 radv_destroy_semaphore_part(struct radv_device *device, struct radv_semaphore_part *part)
6062 {
6063    switch (part->kind) {
6064    case RADV_SEMAPHORE_NONE:
6065       break;
6066    case RADV_SEMAPHORE_TIMELINE:
6067       radv_destroy_timeline(device, &part->timeline);
6068       break;
6069    case RADV_SEMAPHORE_SYNCOBJ:
6070    case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
6071       device->ws->destroy_syncobj(device->ws, part->syncobj);
6072       break;
6073    }
6074    part->kind = RADV_SEMAPHORE_NONE;
6075 }
6076 
6077 static VkSemaphoreTypeKHR
radv_get_semaphore_type(const void * pNext,uint64_t * initial_value)6078 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6079 {
6080    const VkSemaphoreTypeCreateInfo *type_info =
6081       vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6082 
6083    if (!type_info)
6084       return VK_SEMAPHORE_TYPE_BINARY;
6085 
6086    if (initial_value)
6087       *initial_value = type_info->initialValue;
6088    return type_info->semaphoreType;
6089 }
6090 
6091 static void
radv_destroy_semaphore(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_semaphore * sem)6092 radv_destroy_semaphore(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6093                        struct radv_semaphore *sem)
6094 {
6095    radv_destroy_semaphore_part(device, &sem->temporary);
6096    radv_destroy_semaphore_part(device, &sem->permanent);
6097    vk_object_base_finish(&sem->base);
6098    vk_free2(&device->vk.alloc, pAllocator, sem);
6099 }
6100 
6101 VkResult
radv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)6102 radv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo,
6103                      const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)
6104 {
6105    RADV_FROM_HANDLE(radv_device, device, _device);
6106    uint64_t initial_value = 0;
6107    VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6108 
6109    struct radv_semaphore *sem =
6110       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6111    if (!sem)
6112       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6113 
6114    vk_object_base_init(&device->vk, &sem->base, VK_OBJECT_TYPE_SEMAPHORE);
6115 
6116    sem->temporary.kind = RADV_SEMAPHORE_NONE;
6117    sem->permanent.kind = RADV_SEMAPHORE_NONE;
6118 
6119    if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
6120        device->physical_device->rad_info.has_timeline_syncobj) {
6121       int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6122       if (ret) {
6123          radv_destroy_semaphore(device, pAllocator, sem);
6124          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6125       }
6126       device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
6127       sem->permanent.timeline_syncobj.max_point = initial_value;
6128       sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
6129    } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6130       radv_create_timeline(&sem->permanent.timeline, initial_value);
6131       sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6132    } else {
6133       int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6134       if (ret) {
6135          radv_destroy_semaphore(device, pAllocator, sem);
6136          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6137       }
6138       sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6139    }
6140 
6141    *pSemaphore = radv_semaphore_to_handle(sem);
6142    return VK_SUCCESS;
6143 }
6144 
6145 void
radv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)6146 radv_DestroySemaphore(VkDevice _device, VkSemaphore _semaphore,
6147                       const VkAllocationCallbacks *pAllocator)
6148 {
6149    RADV_FROM_HANDLE(radv_device, device, _device);
6150    RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6151    if (!_semaphore)
6152       return;
6153 
6154    radv_destroy_semaphore(device, pAllocator, sem);
6155 }
6156 
6157 VkResult
radv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)6158 radv_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)
6159 {
6160    RADV_FROM_HANDLE(radv_device, device, _device);
6161    RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6162 
6163    if (radv_device_is_lost(device))
6164       return VK_ERROR_DEVICE_LOST;
6165 
6166    struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
6167                                          ? &semaphore->temporary
6168                                          : &semaphore->permanent;
6169 
6170    switch (part->kind) {
6171    case RADV_SEMAPHORE_TIMELINE: {
6172       mtx_lock(&part->timeline.mutex);
6173       radv_timeline_gc_locked(device, &part->timeline);
6174       *pValue = part->timeline.highest_signaled;
6175       mtx_unlock(&part->timeline.mutex);
6176       return VK_SUCCESS;
6177    }
6178    case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6179       return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
6180    }
6181    case RADV_SEMAPHORE_NONE:
6182    case RADV_SEMAPHORE_SYNCOBJ:
6183       unreachable("Invalid semaphore type");
6184    }
6185    unreachable("Unhandled semaphore type");
6186 }
6187 
6188 static VkResult
radv_wait_timelines(struct radv_device * device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t abs_timeout)6189 radv_wait_timelines(struct radv_device *device, const VkSemaphoreWaitInfo *pWaitInfo,
6190                     uint64_t abs_timeout)
6191 {
6192    if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6193       for (;;) {
6194          for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6195             RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6196             VkResult result =
6197                radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6198 
6199             if (result == VK_SUCCESS)
6200                return VK_SUCCESS;
6201          }
6202          if (radv_get_current_time() > abs_timeout)
6203             return VK_TIMEOUT;
6204       }
6205    }
6206 
6207    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6208       RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6209       VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline,
6210                                            pWaitInfo->pValues[i], abs_timeout);
6211 
6212       if (result != VK_SUCCESS)
6213          return result;
6214    }
6215    return VK_SUCCESS;
6216 }
6217 VkResult
radv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)6218 radv_WaitSemaphores(VkDevice _device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
6219 {
6220    RADV_FROM_HANDLE(radv_device, device, _device);
6221 
6222    if (radv_device_is_lost(device))
6223       return VK_ERROR_DEVICE_LOST;
6224 
6225    uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6226 
6227    if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind ==
6228        RADV_SEMAPHORE_TIMELINE)
6229       return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6230 
6231    if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
6232       return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
6233                        "semaphoreCount integer overflow");
6234 
6235    bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
6236    uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
6237    if (!handles)
6238       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6239 
6240    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6241       RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6242       handles[i] = semaphore->permanent.syncobj;
6243    }
6244 
6245    bool success =
6246       device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
6247                                         pWaitInfo->semaphoreCount, wait_all, false, abs_timeout);
6248    free(handles);
6249    return success ? VK_SUCCESS : VK_TIMEOUT;
6250 }
6251 
6252 VkResult
radv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfo * pSignalInfo)6253 radv_SignalSemaphore(VkDevice _device, const VkSemaphoreSignalInfo *pSignalInfo)
6254 {
6255    RADV_FROM_HANDLE(radv_device, device, _device);
6256    RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6257 
6258    struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
6259                                          ? &semaphore->temporary
6260                                          : &semaphore->permanent;
6261 
6262    switch (part->kind) {
6263    case RADV_SEMAPHORE_TIMELINE: {
6264       mtx_lock(&part->timeline.mutex);
6265       radv_timeline_gc_locked(device, &part->timeline);
6266       part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6267       part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6268 
6269       struct list_head processing_list;
6270       list_inithead(&processing_list);
6271       radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6272       mtx_unlock(&part->timeline.mutex);
6273 
6274       VkResult result = radv_process_submissions(&processing_list);
6275 
6276       /* This needs to happen after radv_process_submissions, so
6277        * that any submitted submissions that are now unblocked get
6278        * processed before we wake the application. This way we
6279        * ensure that any binary semaphores that are now unblocked
6280        * are usable by the application. */
6281       u_cnd_monotonic_broadcast(&device->timeline_cond);
6282 
6283       return result;
6284    }
6285    case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6286       part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
6287       device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
6288       break;
6289    }
6290    case RADV_SEMAPHORE_NONE:
6291    case RADV_SEMAPHORE_SYNCOBJ:
6292       unreachable("Invalid semaphore type");
6293    }
6294    return VK_SUCCESS;
6295 }
6296 
6297 static void
radv_destroy_event(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_event * event)6298 radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6299                    struct radv_event *event)
6300 {
6301    if (event->bo)
6302       device->ws->buffer_destroy(device->ws, event->bo);
6303 
6304    vk_object_base_finish(&event->base);
6305    vk_free2(&device->vk.alloc, pAllocator, event);
6306 }
6307 
6308 VkResult
radv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)6309 radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
6310                  const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
6311 {
6312    RADV_FROM_HANDLE(radv_device, device, _device);
6313    struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
6314                                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6315 
6316    if (!event)
6317       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6318 
6319    vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6320 
6321    VkResult result = device->ws->buffer_create(
6322       device->ws, 8, 8, RADEON_DOMAIN_GTT,
6323       RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6324       RADV_BO_PRIORITY_FENCE, 0, &event->bo);
6325    if (result != VK_SUCCESS) {
6326       radv_destroy_event(device, pAllocator, event);
6327       return vk_error(device, result);
6328    }
6329 
6330    event->map = (uint64_t *)device->ws->buffer_map(event->bo);
6331    if (!event->map) {
6332       radv_destroy_event(device, pAllocator, event);
6333       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6334    }
6335 
6336    *pEvent = radv_event_to_handle(event);
6337 
6338    return VK_SUCCESS;
6339 }
6340 
6341 void
radv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)6342 radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
6343 {
6344    RADV_FROM_HANDLE(radv_device, device, _device);
6345    RADV_FROM_HANDLE(radv_event, event, _event);
6346 
6347    if (!event)
6348       return;
6349 
6350    radv_destroy_event(device, pAllocator, event);
6351 }
6352 
6353 VkResult
radv_GetEventStatus(VkDevice _device,VkEvent _event)6354 radv_GetEventStatus(VkDevice _device, VkEvent _event)
6355 {
6356    RADV_FROM_HANDLE(radv_device, device, _device);
6357    RADV_FROM_HANDLE(radv_event, event, _event);
6358 
6359    if (radv_device_is_lost(device))
6360       return VK_ERROR_DEVICE_LOST;
6361 
6362    if (*event->map == 1)
6363       return VK_EVENT_SET;
6364    return VK_EVENT_RESET;
6365 }
6366 
6367 VkResult
radv_SetEvent(VkDevice _device,VkEvent _event)6368 radv_SetEvent(VkDevice _device, VkEvent _event)
6369 {
6370    RADV_FROM_HANDLE(radv_event, event, _event);
6371    *event->map = 1;
6372 
6373    return VK_SUCCESS;
6374 }
6375 
6376 VkResult
radv_ResetEvent(VkDevice _device,VkEvent _event)6377 radv_ResetEvent(VkDevice _device, VkEvent _event)
6378 {
6379    RADV_FROM_HANDLE(radv_event, event, _event);
6380    *event->map = 0;
6381 
6382    return VK_SUCCESS;
6383 }
6384 
6385 void
radv_buffer_init(struct radv_buffer * buffer,struct radv_device * device,struct radeon_winsys_bo * bo,uint64_t size,uint64_t offset)6386 radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
6387                  struct radeon_winsys_bo *bo, uint64_t size,
6388                  uint64_t offset)
6389 {
6390    vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6391 
6392    buffer->usage = 0;
6393    buffer->flags = 0;
6394    buffer->bo = bo;
6395    buffer->size = size;
6396    buffer->offset = offset;
6397 }
6398 
6399 void
radv_buffer_finish(struct radv_buffer * buffer)6400 radv_buffer_finish(struct radv_buffer *buffer)
6401 {
6402    vk_object_base_finish(&buffer->base);
6403 }
6404 
6405 static void
radv_destroy_buffer(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_buffer * buffer)6406 radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6407                     struct radv_buffer *buffer)
6408 {
6409    if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
6410       device->ws->buffer_destroy(device->ws, buffer->bo);
6411 
6412    radv_buffer_finish(buffer);
6413    vk_free2(&device->vk.alloc, pAllocator, buffer);
6414 }
6415 
6416 VkResult
radv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)6417 radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
6418                   const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
6419 {
6420    RADV_FROM_HANDLE(radv_device, device, _device);
6421    struct radv_buffer *buffer;
6422 
6423    if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6424       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6425 
6426    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6427 
6428    buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6429                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6430    if (buffer == NULL)
6431       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6432 
6433    radv_buffer_init(buffer, device, NULL, pCreateInfo->size, 0);
6434 
6435    buffer->usage = pCreateInfo->usage;
6436    buffer->flags = pCreateInfo->flags;
6437 
6438    buffer->shareable =
6439       vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6440 
6441    if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6442       enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL;
6443       if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
6444          flags |= RADEON_FLAG_REPLAYABLE;
6445 
6446       uint64_t replay_address = 0;
6447       const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =
6448          vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
6449       if (replay_info && replay_info->opaqueCaptureAddress)
6450          replay_address = replay_info->opaqueCaptureAddress;
6451 
6452       VkResult result = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
6453                                                   flags, RADV_BO_PRIORITY_VIRTUAL,
6454                                                   replay_address, &buffer->bo);
6455       if (result != VK_SUCCESS) {
6456          radv_destroy_buffer(device, pAllocator, buffer);
6457          return vk_error(device, result);
6458       }
6459    }
6460 
6461    *pBuffer = radv_buffer_to_handle(buffer);
6462 
6463    return VK_SUCCESS;
6464 }
6465 
6466 void
radv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)6467 radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
6468 {
6469    RADV_FROM_HANDLE(radv_device, device, _device);
6470    RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6471 
6472    if (!buffer)
6473       return;
6474 
6475    radv_destroy_buffer(device, pAllocator, buffer);
6476 }
6477 
6478 VkDeviceAddress
radv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6479 radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
6480 {
6481    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6482    return radv_buffer_get_va(buffer->bo) + buffer->offset;
6483 }
6484 
6485 uint64_t
radv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6486 radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
6487 {
6488    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6489    return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0;
6490 }
6491 
6492 uint64_t
radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)6493 radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6494                                          const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
6495 {
6496    RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
6497    return radv_buffer_get_va(mem->bo);
6498 }
6499 
6500 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)6501 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6502 {
6503    if (stencil)
6504       return plane->surface.u.legacy.zs.stencil_tiling_index[level];
6505    else
6506       return plane->surface.u.legacy.tiling_index[level];
6507 }
6508 
6509 static uint32_t
radv_surface_max_layer_count(struct radv_image_view * iview)6510 radv_surface_max_layer_count(struct radv_image_view *iview)
6511 {
6512    return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
6513                                                : (iview->base_layer + iview->layer_count);
6514 }
6515 
6516 static unsigned
get_dcc_max_uncompressed_block_size(const struct radv_device * device,const struct radv_image_view * iview)6517 get_dcc_max_uncompressed_block_size(const struct radv_device *device,
6518                                     const struct radv_image_view *iview)
6519 {
6520    if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {
6521       if (iview->image->planes[0].surface.bpe == 1)
6522          return V_028C78_MAX_BLOCK_SIZE_64B;
6523       else if (iview->image->planes[0].surface.bpe == 2)
6524          return V_028C78_MAX_BLOCK_SIZE_128B;
6525    }
6526 
6527    return V_028C78_MAX_BLOCK_SIZE_256B;
6528 }
6529 
6530 static unsigned
get_dcc_min_compressed_block_size(const struct radv_device * device)6531 get_dcc_min_compressed_block_size(const struct radv_device *device)
6532 {
6533    if (!device->physical_device->rad_info.has_dedicated_vram) {
6534       /* amdvlk: [min-compressed-block-size] should be set to 32 for
6535        * dGPU and 64 for APU because all of our APUs to date use
6536        * DIMMs which have a request granularity size of 64B while all
6537        * other chips have a 32B request size.
6538        */
6539       return V_028C78_MIN_BLOCK_SIZE_64B;
6540    }
6541 
6542    return V_028C78_MIN_BLOCK_SIZE_32B;
6543 }
6544 
6545 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)6546 radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
6547 {
6548    unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
6549    unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
6550    unsigned max_compressed_block_size;
6551    unsigned independent_128b_blocks;
6552    unsigned independent_64b_blocks;
6553 
6554    if (!radv_dcc_enabled(iview->image, iview->base_mip))
6555       return 0;
6556 
6557    /* For GFX9+ ac_surface computes values for us (except min_compressed
6558     * and max_uncompressed) */
6559    if (device->physical_device->rad_info.chip_class >= GFX9) {
6560       max_compressed_block_size =
6561          iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
6562       independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
6563       independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
6564    } else {
6565       independent_128b_blocks = 0;
6566 
6567       if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6568                                  VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6569          /* If this DCC image is potentially going to be used in texture
6570           * fetches, we need some special settings.
6571           */
6572          independent_64b_blocks = 1;
6573          max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6574       } else {
6575          /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6576           * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6577           * big as possible for better compression state.
6578           */
6579          independent_64b_blocks = 0;
6580          max_compressed_block_size = max_uncompressed_block_size;
6581       }
6582    }
6583 
6584    return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6585           S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6586           S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6587           S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6588           S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6589 }
6590 
6591 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)6592 radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
6593                               struct radv_image_view *iview)
6594 {
6595    const struct util_format_description *desc;
6596    unsigned ntype, format, swap, endian;
6597    unsigned blend_clamp = 0, blend_bypass = 0;
6598    uint64_t va;
6599    const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6600    const struct radeon_surf *surf = &plane->surface;
6601 
6602    desc = vk_format_description(iview->vk_format);
6603 
6604    memset(cb, 0, sizeof(*cb));
6605 
6606    /* Intensity is implemented as Red, so treat it that way. */
6607    cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
6608 
6609    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6610 
6611    cb->cb_color_base = va >> 8;
6612 
6613    if (device->physical_device->rad_info.chip_class >= GFX9) {
6614       if (device->physical_device->rad_info.chip_class >= GFX10) {
6615          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
6616                                  S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
6617                                  S_028EE0_CMASK_PIPE_ALIGNED(1) |
6618                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
6619       } else {
6620          struct gfx9_surf_meta_flags meta = {
6621             .rb_aligned = 1,
6622             .pipe_aligned = 1,
6623          };
6624 
6625          if (surf->meta_offset)
6626             meta = surf->u.gfx9.color.dcc;
6627 
6628          cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
6629                                 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
6630                                 S_028C74_RB_ALIGNED(meta.rb_aligned) |
6631                                 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6632          cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
6633       }
6634 
6635       cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6636       cb->cb_color_base |= surf->tile_swizzle;
6637    } else {
6638       const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6639       unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6640 
6641       cb->cb_color_base += level_info->offset_256B;
6642       if (level_info->mode == RADEON_SURF_MODE_2D)
6643          cb->cb_color_base |= surf->tile_swizzle;
6644 
6645       pitch_tile_max = level_info->nblk_x / 8 - 1;
6646       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6647       tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6648 
6649       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6650       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6651       cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
6652 
6653       cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6654 
6655       if (radv_image_has_fmask(iview->image)) {
6656          if (device->physical_device->rad_info.chip_class >= GFX7)
6657             cb->cb_color_pitch |=
6658                S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
6659          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
6660          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
6661       } else {
6662          /* This must be set for fast clear to work without FMASK. */
6663          if (device->physical_device->rad_info.chip_class >= GFX7)
6664             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6665          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6666          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6667       }
6668    }
6669 
6670    /* CMASK variables */
6671    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6672    va += surf->cmask_offset;
6673    cb->cb_color_cmask = va >> 8;
6674 
6675    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6676    va += surf->meta_offset;
6677 
6678    if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6679        device->physical_device->rad_info.chip_class <= GFX8)
6680       va += plane->surface.u.legacy.color.dcc_level[iview->base_mip].dcc_offset;
6681 
6682    unsigned dcc_tile_swizzle = surf->tile_swizzle;
6683    dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
6684 
6685    cb->cb_dcc_base = va >> 8;
6686    cb->cb_dcc_base |= dcc_tile_swizzle;
6687 
6688    /* GFX10 field has the same base shift as the GFX6 field. */
6689    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6690    cb->cb_color_view =
6691       S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
6692 
6693    if (iview->image->info.samples > 1) {
6694       unsigned log_samples = util_logbase2(iview->image->info.samples);
6695 
6696       cb->cb_color_attrib |=
6697          S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples);
6698    }
6699 
6700    if (radv_image_has_fmask(iview->image)) {
6701       va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->fmask_offset;
6702       cb->cb_color_fmask = va >> 8;
6703       cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6704    } else {
6705       cb->cb_color_fmask = cb->cb_color_base;
6706    }
6707 
6708    ntype = radv_translate_color_numformat(iview->vk_format, desc,
6709                                           vk_format_get_first_non_void_channel(iview->vk_format));
6710    format = radv_translate_colorformat(iview->vk_format);
6711    assert(format != V_028C70_COLOR_INVALID);
6712 
6713    swap = radv_translate_colorswap(iview->vk_format, false);
6714    endian = radv_colorformat_endian_swap(format);
6715 
6716    /* blend clamp should be set for all NORM/SRGB types */
6717    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
6718        ntype == V_028C70_NUMBER_SRGB)
6719       blend_clamp = 1;
6720 
6721    /* set blend bypass according to docs if SINT/UINT or
6722       8/24 COLOR variants */
6723    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6724        format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6725        format == V_028C70_COLOR_X24_8_32_FLOAT) {
6726       blend_clamp = 0;
6727       blend_bypass = 1;
6728    }
6729 #if 0
6730 	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6731 	    (format == V_028C70_COLOR_8 ||
6732 	     format == V_028C70_COLOR_8_8 ||
6733 	     format == V_028C70_COLOR_8_8_8_8))
6734 		->color_is_int8 = true;
6735 #endif
6736    cb->cb_color_info =
6737       S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
6738       S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
6739       S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
6740                           ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
6741                           format != V_028C70_COLOR_24_8) |
6742       S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
6743    if (radv_image_has_fmask(iview->image)) {
6744       cb->cb_color_info |= S_028C70_COMPRESSION(1);
6745       if (device->physical_device->rad_info.chip_class == GFX6) {
6746          unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
6747          cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6748       }
6749 
6750       if (radv_image_is_tc_compat_cmask(iview->image)) {
6751          /* Allow the texture block to read FMASK directly
6752           * without decompressing it. This bit must be cleared
6753           * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6754           * otherwise the operation doesn't happen.
6755           */
6756          cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6757 
6758          if (device->physical_device->rad_info.chip_class == GFX8) {
6759             /* Set CMASK into a tiling format that allows
6760              * the texture block to read it.
6761              */
6762             cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6763          }
6764       }
6765    }
6766 
6767    if (radv_image_has_cmask(iview->image) &&
6768        !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6769       cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6770 
6771    if (radv_dcc_enabled(iview->image, iview->base_mip))
6772       cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6773 
6774    cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6775 
6776    /* This must be set for fast clear to work without FMASK. */
6777    if (!radv_image_has_fmask(iview->image) &&
6778        device->physical_device->rad_info.chip_class == GFX6) {
6779       unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6780       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6781    }
6782 
6783    if (device->physical_device->rad_info.chip_class >= GFX9) {
6784       unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D
6785                                ? (iview->extent.depth - 1)
6786                                : (iview->image->info.array_size - 1);
6787       unsigned width =
6788          vk_format_get_plane_width(iview->image->vk_format, iview->plane_id, iview->extent.width);
6789       unsigned height =
6790          vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);
6791 
6792       if (device->physical_device->rad_info.chip_class >= GFX10) {
6793          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6794 
6795          cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6796                                  S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6797                                  S_028EE0_RESOURCE_LEVEL(1);
6798       } else {
6799          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6800          cb->cb_color_attrib |=
6801             S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6802       }
6803 
6804       cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
6805                              S_028C68_MAX_MIP(iview->image->info.levels - 1);
6806    }
6807 }
6808 
6809 static unsigned
radv_calc_decompress_on_z_planes(struct radv_device * device,struct radv_image_view * iview)6810 radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
6811 {
6812    unsigned max_zplanes = 0;
6813 
6814    assert(radv_image_is_tc_compat_htile(iview->image));
6815 
6816    if (device->physical_device->rad_info.chip_class >= GFX9) {
6817       /* Default value for 32-bit depth surfaces. */
6818       max_zplanes = 4;
6819 
6820       if (iview->vk_format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
6821          max_zplanes = 2;
6822 
6823       /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
6824       if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
6825           radv_image_get_iterate256(device, iview->image) &&
6826           !radv_image_tile_stencil_disabled(device, iview->image) &&
6827           iview->image->info.samples == 4) {
6828          max_zplanes = 1;
6829       }
6830 
6831       max_zplanes = max_zplanes + 1;
6832    } else {
6833       if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6834          /* Do not enable Z plane compression for 16-bit depth
6835           * surfaces because isn't supported on GFX8. Only
6836           * 32-bit depth surfaces are supported by the hardware.
6837           * This allows to maintain shader compatibility and to
6838           * reduce the number of depth decompressions.
6839           */
6840          max_zplanes = 1;
6841       } else {
6842          if (iview->image->info.samples <= 1)
6843             max_zplanes = 5;
6844          else if (iview->image->info.samples <= 4)
6845             max_zplanes = 3;
6846          else
6847             max_zplanes = 2;
6848       }
6849    }
6850 
6851    return max_zplanes;
6852 }
6853 
6854 void
radv_initialise_vrs_surface(struct radv_image * image,struct radv_buffer * htile_buffer,struct radv_ds_buffer_info * ds)6855 radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
6856                             struct radv_ds_buffer_info *ds)
6857 {
6858    const struct radeon_surf *surf = &image->planes[0].surface;
6859 
6860    assert(image->vk_format == VK_FORMAT_D16_UNORM);
6861    memset(ds, 0, sizeof(*ds));
6862 
6863    ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6864 
6865    ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) |
6866                    S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6867                    S_028038_ZRANGE_PRECISION(1) |
6868                    S_028038_TILE_SURFACE_ENABLE(1);
6869    ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
6870 
6871    ds->db_depth_size = S_02801C_X_MAX(image->info.width - 1) |
6872                        S_02801C_Y_MAX(image->info.height - 1);
6873 
6874    ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
6875    ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) |
6876                           S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6877 }
6878 
6879 void
radv_initialise_ds_surface(struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview)6880 radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
6881                            struct radv_image_view *iview)
6882 {
6883    unsigned level = iview->base_mip;
6884    unsigned format, stencil_format;
6885    uint64_t va, s_offs, z_offs;
6886    bool stencil_only = iview->image->vk_format == VK_FORMAT_S8_UINT;
6887    const struct radv_image_plane *plane = &iview->image->planes[0];
6888    const struct radeon_surf *surf = &plane->surface;
6889 
6890    assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6891 
6892    memset(ds, 0, sizeof(*ds));
6893    if (!device->instance->absolute_depth_bias) {
6894       switch (iview->image->vk_format) {
6895       case VK_FORMAT_D24_UNORM_S8_UINT:
6896       case VK_FORMAT_X8_D24_UNORM_PACK32:
6897          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6898          break;
6899       case VK_FORMAT_D16_UNORM:
6900       case VK_FORMAT_D16_UNORM_S8_UINT:
6901          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6902          break;
6903       case VK_FORMAT_D32_SFLOAT:
6904       case VK_FORMAT_D32_SFLOAT_S8_UINT:
6905          ds->pa_su_poly_offset_db_fmt_cntl =
6906             S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6907          break;
6908       default:
6909          break;
6910       }
6911    }
6912 
6913    format = radv_translate_dbformat(iview->image->vk_format);
6914    stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6915 
6916    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6917    ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);
6918    if (device->physical_device->rad_info.chip_class >= GFX10) {
6919       ds->db_depth_view |=
6920          S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
6921    }
6922 
6923    ds->db_htile_data_base = 0;
6924    ds->db_htile_surface = 0;
6925 
6926    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6927    s_offs = z_offs = va;
6928 
6929    if (device->physical_device->rad_info.chip_class >= GFX9) {
6930       assert(surf->u.gfx9.surf_offset == 0);
6931       s_offs += surf->u.gfx9.zs.stencil_offset;
6932 
6933       ds->db_z_info = S_028038_FORMAT(format) |
6934                       S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6935                       S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6936                       S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);
6937       ds->db_stencil_info =
6938          S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
6939 
6940       if (device->physical_device->rad_info.chip_class == GFX9) {
6941          ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
6942          ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
6943       }
6944 
6945       ds->db_depth_view |= S_028008_MIPID(level);
6946       ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6947                           S_02801C_Y_MAX(iview->image->info.height - 1);
6948 
6949       if (radv_htile_enabled(iview->image, level)) {
6950          ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6951 
6952          if (radv_image_is_tc_compat_htile(iview->image)) {
6953             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6954 
6955             ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6956 
6957             if (device->physical_device->rad_info.chip_class >= GFX10) {
6958                bool iterate256 = radv_image_get_iterate256(device, iview->image);
6959 
6960                ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6961                ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6962                ds->db_z_info |= S_028040_ITERATE_256(iterate256);
6963                ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
6964             } else {
6965                ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6966                ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6967             }
6968          }
6969 
6970          if (radv_image_tile_stencil_disabled(device, iview->image)) {
6971             ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6972          }
6973 
6974          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6975          ds->db_htile_data_base = va >> 8;
6976          ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
6977 
6978          if (device->physical_device->rad_info.chip_class == GFX9) {
6979             ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6980          }
6981 
6982          if (radv_image_has_vrs_htile(device, iview->image)) {
6983             ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6984          }
6985       }
6986    } else {
6987       const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6988 
6989       if (stencil_only)
6990          level_info = &surf->u.legacy.zs.stencil_level[level];
6991 
6992       z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
6993       s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
6994 
6995       ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6996       ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6997       ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6998 
6999       if (iview->image->info.samples > 1)
7000          ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
7001 
7002       if (device->physical_device->rad_info.chip_class >= GFX7) {
7003          struct radeon_info *info = &device->physical_device->rad_info;
7004          unsigned tiling_index = surf->u.legacy.tiling_index[level];
7005          unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
7006          unsigned macro_index = surf->u.legacy.macro_tile_index;
7007          unsigned tile_mode = info->si_tile_mode_array[tiling_index];
7008          unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
7009          unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
7010 
7011          if (stencil_only)
7012             tile_mode = stencil_tile_mode;
7013 
7014          ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
7015                               S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
7016                               S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
7017                               S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
7018                               S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
7019                               S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
7020          ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
7021          ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
7022       } else {
7023          unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
7024          ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7025          tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
7026          ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
7027          if (stencil_only)
7028             ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7029       }
7030 
7031       ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
7032                           S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
7033       ds->db_depth_slice =
7034          S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
7035 
7036       if (radv_htile_enabled(iview->image, level)) {
7037          ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
7038 
7039          if (radv_image_tile_stencil_disabled(device, iview->image)) {
7040             ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
7041          }
7042 
7043          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
7044          ds->db_htile_data_base = va >> 8;
7045          ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
7046 
7047          if (radv_image_is_tc_compat_htile(iview->image)) {
7048             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
7049 
7050             ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
7051             ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7052          }
7053       }
7054    }
7055 
7056    ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
7057    ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
7058 }
7059 
7060 VkResult
radv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)7061 radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo,
7062                        const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer)
7063 {
7064    RADV_FROM_HANDLE(radv_device, device, _device);
7065    struct radv_framebuffer *framebuffer;
7066    const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
7067       vk_find_struct_const(pCreateInfo->pNext, FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
7068 
7069    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
7070 
7071    size_t size = sizeof(*framebuffer);
7072    if (!imageless_create_info)
7073       size += sizeof(struct radv_image_view *) * pCreateInfo->attachmentCount;
7074    framebuffer =
7075       vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7076    if (framebuffer == NULL)
7077       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7078 
7079    vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);
7080 
7081    framebuffer->attachment_count = pCreateInfo->attachmentCount;
7082    framebuffer->width = pCreateInfo->width;
7083    framebuffer->height = pCreateInfo->height;
7084    framebuffer->layers = pCreateInfo->layers;
7085    framebuffer->imageless = !!imageless_create_info;
7086 
7087    if (!imageless_create_info) {
7088       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
7089          VkImageView _iview = pCreateInfo->pAttachments[i];
7090          struct radv_image_view *iview = radv_image_view_from_handle(_iview);
7091          framebuffer->attachments[i] = iview;
7092       }
7093    }
7094 
7095    *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7096    return VK_SUCCESS;
7097 }
7098 
7099 void
radv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)7100 radv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb,
7101                         const VkAllocationCallbacks *pAllocator)
7102 {
7103    RADV_FROM_HANDLE(radv_device, device, _device);
7104    RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7105 
7106    if (!fb)
7107       return;
7108    vk_object_base_finish(&fb->base);
7109    vk_free2(&device->vk.alloc, pAllocator, fb);
7110 }
7111 
7112 static unsigned
radv_tex_wrap(VkSamplerAddressMode address_mode)7113 radv_tex_wrap(VkSamplerAddressMode address_mode)
7114 {
7115    switch (address_mode) {
7116    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7117       return V_008F30_SQ_TEX_WRAP;
7118    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7119       return V_008F30_SQ_TEX_MIRROR;
7120    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7121       return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7122    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7123       return V_008F30_SQ_TEX_CLAMP_BORDER;
7124    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7125       return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7126    default:
7127       unreachable("illegal tex wrap mode");
7128       break;
7129    }
7130 }
7131 
7132 static unsigned
radv_tex_compare(VkCompareOp op)7133 radv_tex_compare(VkCompareOp op)
7134 {
7135    switch (op) {
7136    case VK_COMPARE_OP_NEVER:
7137       return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7138    case VK_COMPARE_OP_LESS:
7139       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7140    case VK_COMPARE_OP_EQUAL:
7141       return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7142    case VK_COMPARE_OP_LESS_OR_EQUAL:
7143       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7144    case VK_COMPARE_OP_GREATER:
7145       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7146    case VK_COMPARE_OP_NOT_EQUAL:
7147       return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7148    case VK_COMPARE_OP_GREATER_OR_EQUAL:
7149       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7150    case VK_COMPARE_OP_ALWAYS:
7151       return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7152    default:
7153       unreachable("illegal compare mode");
7154       break;
7155    }
7156 }
7157 
7158 static unsigned
radv_tex_filter(VkFilter filter,unsigned max_ansio)7159 radv_tex_filter(VkFilter filter, unsigned max_ansio)
7160 {
7161    switch (filter) {
7162    case VK_FILTER_NEAREST:
7163       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
7164                             : V_008F38_SQ_TEX_XY_FILTER_POINT);
7165    case VK_FILTER_LINEAR:
7166       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
7167                             : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7168    case VK_FILTER_CUBIC_IMG:
7169    default:
7170       fprintf(stderr, "illegal texture filter");
7171       return 0;
7172    }
7173 }
7174 
7175 static unsigned
radv_tex_mipfilter(VkSamplerMipmapMode mode)7176 radv_tex_mipfilter(VkSamplerMipmapMode mode)
7177 {
7178    switch (mode) {
7179    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7180       return V_008F38_SQ_TEX_Z_FILTER_POINT;
7181    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7182       return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7183    default:
7184       return V_008F38_SQ_TEX_Z_FILTER_NONE;
7185    }
7186 }
7187 
7188 static unsigned
radv_tex_bordercolor(VkBorderColor bcolor)7189 radv_tex_bordercolor(VkBorderColor bcolor)
7190 {
7191    switch (bcolor) {
7192    case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7193    case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7194       return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7195    case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7196    case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7197       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7198    case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7199    case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7200       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7201    case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7202    case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7203       return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7204    default:
7205       break;
7206    }
7207    return 0;
7208 }
7209 
7210 static unsigned
radv_tex_aniso_filter(unsigned filter)7211 radv_tex_aniso_filter(unsigned filter)
7212 {
7213    if (filter < 2)
7214       return 0;
7215    if (filter < 4)
7216       return 1;
7217    if (filter < 8)
7218       return 2;
7219    if (filter < 16)
7220       return 3;
7221    return 4;
7222 }
7223 
7224 static unsigned
radv_tex_filter_mode(VkSamplerReductionMode mode)7225 radv_tex_filter_mode(VkSamplerReductionMode mode)
7226 {
7227    switch (mode) {
7228    case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7229       return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7230    case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7231       return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7232    case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7233       return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7234    default:
7235       break;
7236    }
7237    return 0;
7238 }
7239 
7240 static uint32_t
radv_get_max_anisotropy(struct radv_device * device,const VkSamplerCreateInfo * pCreateInfo)7241 radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
7242 {
7243    if (device->force_aniso >= 0)
7244       return device->force_aniso;
7245 
7246    if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
7247       return (uint32_t)pCreateInfo->maxAnisotropy;
7248 
7249    return 0;
7250 }
7251 
7252 static inline int
S_FIXED(float value,unsigned frac_bits)7253 S_FIXED(float value, unsigned frac_bits)
7254 {
7255    return value * (1 << frac_bits);
7256 }
7257 
7258 static uint32_t
radv_register_border_color(struct radv_device * device,VkClearColorValue value)7259 radv_register_border_color(struct radv_device *device, VkClearColorValue value)
7260 {
7261    uint32_t slot;
7262 
7263    mtx_lock(&device->border_color_data.mutex);
7264 
7265    for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7266       if (!device->border_color_data.used[slot]) {
7267          /* Copy to the GPU wrt endian-ness. */
7268          util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
7269                                  sizeof(VkClearColorValue));
7270 
7271          device->border_color_data.used[slot] = true;
7272          break;
7273       }
7274    }
7275 
7276    mtx_unlock(&device->border_color_data.mutex);
7277 
7278    return slot;
7279 }
7280 
7281 static void
radv_unregister_border_color(struct radv_device * device,uint32_t slot)7282 radv_unregister_border_color(struct radv_device *device, uint32_t slot)
7283 {
7284    mtx_lock(&device->border_color_data.mutex);
7285 
7286    device->border_color_data.used[slot] = false;
7287 
7288    mtx_unlock(&device->border_color_data.mutex);
7289 }
7290 
7291 static void
radv_init_sampler(struct radv_device * device,struct radv_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)7292 radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
7293                   const VkSamplerCreateInfo *pCreateInfo)
7294 {
7295    uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7296    uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7297    bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7298                       device->physical_device->rad_info.chip_class == GFX9;
7299    unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7300    unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7301    bool trunc_coord =
7302       pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7303    bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7304                             pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7305                             pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7306    VkBorderColor border_color =
7307       uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7308    uint32_t border_color_ptr;
7309 
7310    const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7311       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
7312    if (sampler_reduction)
7313       filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7314 
7315    if (pCreateInfo->compareEnable)
7316       depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7317 
7318    sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7319 
7320    if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
7321        border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7322       const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7323          vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7324 
7325       assert(custom_border_color);
7326 
7327       sampler->border_color_slot =
7328          radv_register_border_color(device, custom_border_color->customBorderColor);
7329 
7330       /* Did we fail to find a slot? */
7331       if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7332          fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7333          border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7334       }
7335    }
7336 
7337    /* If we don't have a custom color, set the ptr to 0 */
7338    border_color_ptr =
7339       sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
7340 
7341    sampler->state[0] =
7342       (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7343        S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7344        S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7345        S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7346        S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7347        S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
7348        S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(compat_mode) |
7349        S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
7350    sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7351                         S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7352                         S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7353    sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7354                         S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7355                         S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7356                         S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7357                         S_008F38_MIP_POINT_PRECLAMP(0));
7358    sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7359                         S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7360 
7361    if (device->physical_device->rad_info.chip_class >= GFX10) {
7362       sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7363    } else {
7364       sampler->state[2] |=
7365          S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7366          S_008F38_FILTER_PREC_FIX(1) |
7367          S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
7368    }
7369 }
7370 
7371 VkResult
radv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)7372 radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
7373                    const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
7374 {
7375    RADV_FROM_HANDLE(radv_device, device, _device);
7376    struct radv_sampler *sampler;
7377 
7378    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7379       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
7380 
7381    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7382 
7383    sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7384                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7385    if (!sampler)
7386       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7387 
7388    vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
7389 
7390    radv_init_sampler(device, sampler, pCreateInfo);
7391 
7392    sampler->ycbcr_sampler =
7393       ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
7394                        : NULL;
7395    *pSampler = radv_sampler_to_handle(sampler);
7396 
7397    return VK_SUCCESS;
7398 }
7399 
7400 void
radv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)7401 radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
7402 {
7403    RADV_FROM_HANDLE(radv_device, device, _device);
7404    RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7405 
7406    if (!sampler)
7407       return;
7408 
7409    if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7410       radv_unregister_border_color(device, sampler->border_color_slot);
7411 
7412    vk_object_base_finish(&sampler->base);
7413    vk_free2(&device->vk.alloc, pAllocator, sampler);
7414 }
7415 
7416 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)7417 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7418 {
7419    /* For the full details on loader interface versioning, see
7420     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7421     * What follows is a condensed summary, to help you navigate the large and
7422     * confusing official doc.
7423     *
7424     *   - Loader interface v0 is incompatible with later versions. We don't
7425     *     support it.
7426     *
7427     *   - In loader interface v1:
7428     *       - The first ICD entrypoint called by the loader is
7429     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7430     *         entrypoint.
7431     *       - The ICD must statically expose no other Vulkan symbol unless it is
7432     *         linked with -Bsymbolic.
7433     *       - Each dispatchable Vulkan handle created by the ICD must be
7434     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
7435     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7436     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7437     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
7438     *         such loader-managed surfaces.
7439     *
7440     *    - Loader interface v2 differs from v1 in:
7441     *       - The first ICD entrypoint called by the loader is
7442     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7443     *         statically expose this entrypoint.
7444     *
7445     *    - Loader interface v3 differs from v2 in:
7446     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7447     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7448     *          because the loader no longer does so.
7449     */
7450    *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7451    return VK_SUCCESS;
7452 }
7453 
7454 VkResult
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)7455 radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
7456 {
7457    RADV_FROM_HANDLE(radv_device, device, _device);
7458    RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7459 
7460    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7461 
7462    /* At the moment, we support only the below handle types. */
7463    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7464           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7465 
7466    bool ret = radv_get_memory_fd(device, memory, pFD);
7467    if (ret == false)
7468       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7469    return VK_SUCCESS;
7470 }
7471 
7472 static uint32_t
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)7473 radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7474                                         enum radeon_bo_domain domains, enum radeon_bo_flag flags,
7475                                         enum radeon_bo_flag ignore_flags)
7476 {
7477    /* Don't count GTT/CPU as relevant:
7478     *
7479     * - We're not fully consistent between the two.
7480     * - Sometimes VRAM gets VRAM|GTT.
7481     */
7482    const enum radeon_bo_domain relevant_domains =
7483       RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
7484    uint32_t bits = 0;
7485    for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7486       if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7487          continue;
7488 
7489       if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7490          continue;
7491 
7492       bits |= 1u << i;
7493    }
7494 
7495    return bits;
7496 }
7497 
7498 static uint32_t
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)7499 radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
7500                                 enum radeon_bo_flag flags)
7501 {
7502    enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7503    uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7504 
7505    if (!bits) {
7506       ignore_flags |= RADEON_FLAG_GTT_WC;
7507       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7508    }
7509 
7510    if (!bits) {
7511       ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7512       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7513    }
7514 
7515    return bits;
7516 }
7517 VkResult
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)7518 radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
7519                               int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7520 {
7521    RADV_FROM_HANDLE(radv_device, device, _device);
7522 
7523    switch (handleType) {
7524    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7525       enum radeon_bo_domain domains;
7526       enum radeon_bo_flag flags;
7527       if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7528          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7529 
7530       pMemoryFdProperties->memoryTypeBits =
7531          radv_compute_valid_memory_types(device->physical_device, domains, flags);
7532       return VK_SUCCESS;
7533    }
7534    default:
7535       /* The valid usage section for this function says:
7536        *
7537        *    "handleType must not be one of the handle types defined as
7538        *    opaque."
7539        *
7540        * So opaque handle types fall into the default "unsupported" case.
7541        */
7542       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7543    }
7544 }
7545 
7546 static VkResult
radv_import_opaque_fd(struct radv_device * device,int fd,uint32_t * syncobj)7547 radv_import_opaque_fd(struct radv_device *device, int fd, uint32_t *syncobj)
7548 {
7549    uint32_t syncobj_handle = 0;
7550    int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7551    if (ret != 0)
7552       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7553 
7554    if (*syncobj)
7555       device->ws->destroy_syncobj(device->ws, *syncobj);
7556 
7557    *syncobj = syncobj_handle;
7558    close(fd);
7559 
7560    return VK_SUCCESS;
7561 }
7562 
7563 static VkResult
radv_import_sync_fd(struct radv_device * device,int fd,uint32_t * syncobj)7564 radv_import_sync_fd(struct radv_device *device, int fd, uint32_t *syncobj)
7565 {
7566    /* If we create a syncobj we do it locally so that if we have an error, we don't
7567     * leave a syncobj in an undetermined state in the fence. */
7568    uint32_t syncobj_handle = *syncobj;
7569    if (!syncobj_handle) {
7570       bool create_signaled = fd == -1 ? true : false;
7571 
7572       int ret = device->ws->create_syncobj(device->ws, create_signaled, &syncobj_handle);
7573       if (ret) {
7574          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7575       }
7576    } else {
7577       if (fd == -1)
7578          device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
7579    }
7580 
7581    if (fd != -1) {
7582       int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7583       if (ret)
7584          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7585       close(fd);
7586    }
7587 
7588    *syncobj = syncobj_handle;
7589 
7590    return VK_SUCCESS;
7591 }
7592 
7593 VkResult
radv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)7594 radv_ImportSemaphoreFdKHR(VkDevice _device,
7595                           const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7596 {
7597    RADV_FROM_HANDLE(radv_device, device, _device);
7598    RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7599    VkResult result;
7600    struct radv_semaphore_part *dst = NULL;
7601    bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7602 
7603    if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7604       assert(!timeline);
7605       dst = &sem->temporary;
7606    } else {
7607       dst = &sem->permanent;
7608    }
7609 
7610    uint32_t syncobj =
7611       (dst->kind == RADV_SEMAPHORE_SYNCOBJ || dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
7612          ? dst->syncobj
7613          : 0;
7614 
7615    switch (pImportSemaphoreFdInfo->handleType) {
7616    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7617       result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7618       break;
7619    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7620       assert(!timeline);
7621       result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7622       break;
7623    default:
7624       unreachable("Unhandled semaphore handle type");
7625    }
7626 
7627    if (result == VK_SUCCESS) {
7628       dst->syncobj = syncobj;
7629       dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7630       if (timeline) {
7631          dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7632          dst->timeline_syncobj.max_point = 0;
7633       }
7634    }
7635 
7636    return result;
7637 }
7638 
7639 VkResult
radv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)7640 radv_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)
7641 {
7642    RADV_FROM_HANDLE(radv_device, device, _device);
7643    RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7644    int ret;
7645    uint32_t syncobj_handle;
7646 
7647    if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7648       assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
7649              sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7650       syncobj_handle = sem->temporary.syncobj;
7651    } else {
7652       assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
7653              sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7654       syncobj_handle = sem->permanent.syncobj;
7655    }
7656 
7657    switch (pGetFdInfo->handleType) {
7658    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7659       ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7660       if (ret)
7661          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7662       break;
7663    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7664       ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7665       if (ret)
7666          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7667 
7668       if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7669          radv_destroy_semaphore_part(device, &sem->temporary);
7670       } else {
7671          device->ws->reset_syncobj(device->ws, syncobj_handle);
7672       }
7673       break;
7674    default:
7675       unreachable("Unhandled semaphore handle type");
7676    }
7677 
7678    return VK_SUCCESS;
7679 }
7680 
7681 void
radv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)7682 radv_GetPhysicalDeviceExternalSemaphoreProperties(
7683    VkPhysicalDevice physicalDevice,
7684    const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7685    VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
7686 {
7687    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7688    VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7689 
7690    if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
7691        pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7692       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7693          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7694       pExternalSemaphoreProperties->compatibleHandleTypes =
7695          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7696       pExternalSemaphoreProperties->externalSemaphoreFeatures =
7697          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7698          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7699    } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7700       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7701       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7702       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7703    } else if (pExternalSemaphoreInfo->handleType ==
7704                  VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7705               pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
7706       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7707          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
7708          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7709       pExternalSemaphoreProperties->compatibleHandleTypes =
7710          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
7711          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7712       pExternalSemaphoreProperties->externalSemaphoreFeatures =
7713          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7714          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7715    } else if (pExternalSemaphoreInfo->handleType ==
7716               VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7717       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7718          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7719       pExternalSemaphoreProperties->compatibleHandleTypes =
7720          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7721       pExternalSemaphoreProperties->externalSemaphoreFeatures =
7722          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7723          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7724    } else {
7725       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7726       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7727       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7728    }
7729 }
7730 
7731 VkResult
radv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)7732 radv_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7733 {
7734    RADV_FROM_HANDLE(radv_device, device, _device);
7735    RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7736    struct radv_fence_part *dst = NULL;
7737    VkResult result;
7738 
7739    if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7740       dst = &fence->temporary;
7741    } else {
7742       dst = &fence->permanent;
7743    }
7744 
7745    uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
7746 
7747    switch (pImportFenceFdInfo->handleType) {
7748    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7749       result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
7750       break;
7751    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7752       result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
7753       break;
7754    default:
7755       unreachable("Unhandled fence handle type");
7756    }
7757 
7758    if (result == VK_SUCCESS) {
7759       dst->syncobj = syncobj;
7760       dst->kind = RADV_FENCE_SYNCOBJ;
7761    }
7762 
7763    return result;
7764 }
7765 
7766 VkResult
radv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)7767 radv_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd)
7768 {
7769    RADV_FROM_HANDLE(radv_device, device, _device);
7770    RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7771    int ret;
7772 
7773    struct radv_fence_part *part =
7774       fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
7775 
7776    switch (pGetFdInfo->handleType) {
7777    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7778       ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
7779       if (ret)
7780          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7781       break;
7782    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7783       ret = device->ws->export_syncobj_to_sync_file(device->ws, part->syncobj, pFd);
7784       if (ret)
7785          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7786 
7787       if (part == &fence->temporary) {
7788          radv_destroy_fence_part(device, part);
7789       } else {
7790          device->ws->reset_syncobj(device->ws, part->syncobj);
7791       }
7792       break;
7793    default:
7794       unreachable("Unhandled fence handle type");
7795    }
7796 
7797    return VK_SUCCESS;
7798 }
7799 
7800 void
radv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)7801 radv_GetPhysicalDeviceExternalFenceProperties(
7802    VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7803    VkExternalFenceProperties *pExternalFenceProperties)
7804 {
7805    if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7806        pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
7807       pExternalFenceProperties->exportFromImportedHandleTypes =
7808          VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7809       pExternalFenceProperties->compatibleHandleTypes =
7810          VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7811       pExternalFenceProperties->externalFenceFeatures =
7812          VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7813    } else {
7814       pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7815       pExternalFenceProperties->compatibleHandleTypes = 0;
7816       pExternalFenceProperties->externalFenceFeatures = 0;
7817    }
7818 }
7819 
7820 void
radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)7821 radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
7822                                       uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
7823                                       VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
7824 {
7825    assert(localDeviceIndex == remoteDeviceIndex);
7826 
7827    *pPeerMemoryFeatures =
7828       VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7829       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7830 }
7831 
7832 static const VkTimeDomainEXT radv_time_domains[] = {
7833    VK_TIME_DOMAIN_DEVICE_EXT,
7834    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
7835 #ifdef CLOCK_MONOTONIC_RAW
7836    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
7837 #endif
7838 };
7839 
7840 VkResult
radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)7841 radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
7842                                                   uint32_t *pTimeDomainCount,
7843                                                   VkTimeDomainEXT *pTimeDomains)
7844 {
7845    int d;
7846    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
7847 
7848    for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
7849       vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
7850       {
7851          *i = radv_time_domains[d];
7852       }
7853    }
7854 
7855    return vk_outarray_status(&out);
7856 }
7857 
7858 #ifndef _WIN32
7859 static uint64_t
radv_clock_gettime(clockid_t clock_id)7860 radv_clock_gettime(clockid_t clock_id)
7861 {
7862    struct timespec current;
7863    int ret;
7864 
7865    ret = clock_gettime(clock_id, &current);
7866 #ifdef CLOCK_MONOTONIC_RAW
7867    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
7868       ret = clock_gettime(CLOCK_MONOTONIC, &current);
7869 #endif
7870    if (ret < 0)
7871       return 0;
7872 
7873    return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
7874 }
7875 
7876 VkResult
radv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)7877 radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
7878                                 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
7879                                 uint64_t *pTimestamps, uint64_t *pMaxDeviation)
7880 {
7881    RADV_FROM_HANDLE(radv_device, device, _device);
7882    uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
7883    int d;
7884    uint64_t begin, end;
7885    uint64_t max_clock_period = 0;
7886 
7887 #ifdef CLOCK_MONOTONIC_RAW
7888    begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7889 #else
7890    begin = radv_clock_gettime(CLOCK_MONOTONIC);
7891 #endif
7892 
7893    for (d = 0; d < timestampCount; d++) {
7894       switch (pTimestampInfos[d].timeDomain) {
7895       case VK_TIME_DOMAIN_DEVICE_EXT:
7896          pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
7897          uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
7898          max_clock_period = MAX2(max_clock_period, device_period);
7899          break;
7900       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
7901          pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
7902          max_clock_period = MAX2(max_clock_period, 1);
7903          break;
7904 
7905 #ifdef CLOCK_MONOTONIC_RAW
7906       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
7907          pTimestamps[d] = begin;
7908          break;
7909 #endif
7910       default:
7911          pTimestamps[d] = 0;
7912          break;
7913       }
7914    }
7915 
7916 #ifdef CLOCK_MONOTONIC_RAW
7917    end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7918 #else
7919    end = radv_clock_gettime(CLOCK_MONOTONIC);
7920 #endif
7921 
7922    /*
7923     * The maximum deviation is the sum of the interval over which we
7924     * perform the sampling and the maximum period of any sampled
7925     * clock. That's because the maximum skew between any two sampled
7926     * clock edges is when the sampled clock with the largest period is
7927     * sampled at the end of that period but right at the beginning of the
7928     * sampling interval and some other clock is sampled right at the
7929     * begining of its sampling period and right at the end of the
7930     * sampling interval. Let's assume the GPU has the longest clock
7931     * period and that the application is sampling GPU and monotonic:
7932     *
7933     *                               s                 e
7934     *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
7935     *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7936     *
7937     *                               g
7938     *		  0         1         2         3
7939     *	GPU       -----_____-----_____-----_____-----_____
7940     *
7941     *                                                m
7942     *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
7943     *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7944     *
7945     *	Interval                     <----------------->
7946     *	Deviation           <-------------------------->
7947     *
7948     *		s  = read(raw)       2
7949     *		g  = read(GPU)       1
7950     *		m  = read(monotonic) 2
7951     *		e  = read(raw)       b
7952     *
7953     * We round the sample interval up by one tick to cover sampling error
7954     * in the interval clock
7955     */
7956 
7957    uint64_t sample_interval = end - begin + 1;
7958 
7959    *pMaxDeviation = sample_interval + max_clock_period;
7960 
7961    return VK_SUCCESS;
7962 }
7963 #endif
7964 
7965 void
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)7966 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
7967                                                VkSampleCountFlagBits samples,
7968                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
7969 {
7970    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
7971                                              VK_SAMPLE_COUNT_8_BIT;
7972 
7973    if (samples & supported_samples) {
7974       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
7975    } else {
7976       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
7977    }
7978 }
7979 
7980 VkResult
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)7981 radv_GetPhysicalDeviceFragmentShadingRatesKHR(
7982    VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
7983    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
7984 {
7985    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
7986                           pFragmentShadingRateCount);
7987 
7988 #define append_rate(w, h, s)                                                                       \
7989    {                                                                                               \
7990       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                              \
7991          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,          \
7992          .sampleCounts = s,                                                                        \
7993          .fragmentSize = {.width = w, .height = h},                                                \
7994       };                                                                                           \
7995       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;         \
7996    }
7997 
7998    for (uint32_t x = 2; x >= 1; x--) {
7999       for (uint32_t y = 2; y >= 1; y--) {
8000          VkSampleCountFlagBits samples;
8001 
8002          if (x == 1 && y == 1) {
8003             samples = ~0;
8004          } else {
8005             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT |
8006                       VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
8007          }
8008 
8009          append_rate(x, y, samples);
8010       }
8011    }
8012 #undef append_rate
8013 
8014    return vk_outarray_status(&out);
8015 }
8016