1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #ifdef MAJOR_IN_MKDEV
28 #include <sys/mkdev.h>
29 #endif
30 #ifdef MAJOR_IN_SYSMACROS
31 #include <sys/sysmacros.h>
32 #endif
33 #include <sys/mman.h>
34 #include <sys/stat.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include "drm-uapi/drm_fourcc.h"
38 #include "drm-uapi/drm.h"
39 #include <xf86drm.h>
40 
41 #include "anv_private.h"
42 #include "anv_measure.h"
43 #include "util/debug.h"
44 #include "util/build_id.h"
45 #include "util/disk_cache.h"
46 #include "util/mesa-sha1.h"
47 #include "util/os_file.h"
48 #include "util/os_misc.h"
49 #include "util/u_atomic.h"
50 #include "util/u_string.h"
51 #include "util/driconf.h"
52 #include "git_sha1.h"
53 #include "vk_util.h"
54 #include "vk_deferred_operation.h"
55 #include "common/intel_aux_map.h"
56 #include "common/intel_defines.h"
57 #include "common/intel_uuid.h"
58 #include "perf/intel_perf.h"
59 
60 #include "genxml/gen7_pack.h"
61 
62 static const driOptionDescription anv_dri_options[] = {
63    DRI_CONF_SECTION_PERFORMANCE
64       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
65       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
66       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
67    DRI_CONF_SECTION_END
68 
69    DRI_CONF_SECTION_DEBUG
70       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
71       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
72    DRI_CONF_SECTION_END
73 };
74 
75 /* This is probably far to big but it reflects the max size used for messages
76  * in OpenGLs KHR_debug.
77  */
78 #define MAX_DEBUG_MESSAGE_LENGTH    4096
79 
80 /* Render engine timestamp register */
81 #define TIMESTAMP 0x2358
82 
83 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
84 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
85 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
86 #endif
87 
88 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)89 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
90 {
91    char str[MAX_DEBUG_MESSAGE_LENGTH];
92    struct anv_device *device = (struct anv_device *)data;
93    struct anv_instance *instance = device->physical->instance;
94 
95    va_list args;
96    va_start(args, fmt);
97    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
98    va_end(args);
99 
100    vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
101 }
102 
103 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)104 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
105 {
106    va_list args;
107    va_start(args, fmt);
108 
109    if (INTEL_DEBUG(DEBUG_PERF))
110       mesa_logd_v(fmt, args);
111 
112    va_end(args);
113 }
114 
115 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
116     defined(VK_USE_PLATFORM_XCB_KHR) || \
117     defined(VK_USE_PLATFORM_XLIB_KHR) || \
118     defined(VK_USE_PLATFORM_DISPLAY_KHR)
119 #define ANV_USE_WSI_PLATFORM
120 #endif
121 
122 #ifdef ANDROID
123 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
124 #else
125 #define ANV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
126 #endif
127 
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)128 VkResult anv_EnumerateInstanceVersion(
129     uint32_t*                                   pApiVersion)
130 {
131     *pApiVersion = ANV_API_VERSION;
132     return VK_SUCCESS;
133 }
134 
135 static const struct vk_instance_extension_table instance_extensions = {
136    .KHR_device_group_creation                = true,
137    .KHR_external_fence_capabilities          = true,
138    .KHR_external_memory_capabilities         = true,
139    .KHR_external_semaphore_capabilities      = true,
140    .KHR_get_physical_device_properties2      = true,
141    .EXT_debug_report                         = true,
142 
143 #ifdef ANV_USE_WSI_PLATFORM
144    .KHR_get_surface_capabilities2            = true,
145    .KHR_surface                              = true,
146    .KHR_surface_protected_capabilities       = true,
147 #endif
148 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
149    .KHR_wayland_surface                      = true,
150 #endif
151 #ifdef VK_USE_PLATFORM_XCB_KHR
152    .KHR_xcb_surface                          = true,
153 #endif
154 #ifdef VK_USE_PLATFORM_XLIB_KHR
155    .KHR_xlib_surface                         = true,
156 #endif
157 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
158    .EXT_acquire_xlib_display                 = true,
159 #endif
160 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
161    .KHR_display                              = true,
162    .KHR_get_display_properties2              = true,
163    .EXT_direct_mode_display                  = true,
164    .EXT_display_surface_counter              = true,
165    .EXT_acquire_drm_display                  = true,
166 #endif
167 };
168 
169 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)170 get_device_extensions(const struct anv_physical_device *device,
171                       struct vk_device_extension_table *ext)
172 {
173    *ext = (struct vk_device_extension_table) {
174       .KHR_8bit_storage                      = device->info.ver >= 8,
175       .KHR_16bit_storage                     = device->info.ver >= 8,
176       .KHR_bind_memory2                      = true,
177       .KHR_buffer_device_address             = device->has_a64_buffer_access,
178       .KHR_copy_commands2                    = true,
179       .KHR_create_renderpass2                = true,
180       .KHR_dedicated_allocation              = true,
181       .KHR_deferred_host_operations          = true,
182       .KHR_depth_stencil_resolve             = true,
183       .KHR_descriptor_update_template        = true,
184       .KHR_device_group                      = true,
185       .KHR_draw_indirect_count               = true,
186       .KHR_driver_properties                 = true,
187       .KHR_external_fence                    = device->has_syncobj_wait,
188       .KHR_external_fence_fd                 = device->has_syncobj_wait,
189       .KHR_external_memory                   = true,
190       .KHR_external_memory_fd                = true,
191       .KHR_external_semaphore                = true,
192       .KHR_external_semaphore_fd             = true,
193       .KHR_format_feature_flags2             = true,
194       .KHR_fragment_shading_rate             = device->info.ver >= 11,
195       .KHR_get_memory_requirements2          = true,
196       .KHR_image_format_list                 = true,
197       .KHR_imageless_framebuffer             = true,
198 #ifdef ANV_USE_WSI_PLATFORM
199       .KHR_incremental_present               = true,
200 #endif
201       .KHR_maintenance1                      = true,
202       .KHR_maintenance2                      = true,
203       .KHR_maintenance3                      = true,
204       .KHR_maintenance4                      = true,
205       .KHR_multiview                         = true,
206       .KHR_performance_query =
207          device->use_softpin && device->perf &&
208          (device->perf->i915_perf_version >= 3 ||
209           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
210          device->use_call_secondary,
211       .KHR_pipeline_executable_properties    = true,
212       .KHR_push_descriptor                   = true,
213       .KHR_relaxed_block_layout              = true,
214       .KHR_sampler_mirror_clamp_to_edge      = true,
215       .KHR_sampler_ycbcr_conversion          = true,
216       .KHR_separate_depth_stencil_layouts    = true,
217       .KHR_shader_atomic_int64               = device->info.ver >= 9 &&
218                                                device->use_softpin,
219       .KHR_shader_clock                      = true,
220       .KHR_shader_draw_parameters            = true,
221       .KHR_shader_float16_int8               = device->info.ver >= 8,
222       .KHR_shader_float_controls             = device->info.ver >= 8,
223       .KHR_shader_integer_dot_product        = true,
224       .KHR_shader_non_semantic_info          = true,
225       .KHR_shader_subgroup_extended_types    = device->info.ver >= 8,
226       .KHR_shader_subgroup_uniform_control_flow = true,
227       .KHR_shader_terminate_invocation       = true,
228       .KHR_spirv_1_4                         = true,
229       .KHR_storage_buffer_storage_class      = true,
230 #ifdef ANV_USE_WSI_PLATFORM
231       .KHR_swapchain                         = true,
232       .KHR_swapchain_mutable_format          = true,
233 #endif
234       .KHR_synchronization2                  = true,
235       .KHR_timeline_semaphore                = true,
236       .KHR_uniform_buffer_standard_layout    = true,
237       .KHR_variable_pointers                 = true,
238       .KHR_vulkan_memory_model               = true,
239       .KHR_workgroup_memory_explicit_layout  = true,
240       .KHR_zero_initialize_workgroup_memory  = true,
241       .EXT_4444_formats                      = true,
242       .EXT_buffer_device_address             = device->has_a64_buffer_access,
243       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
244       .EXT_color_write_enable                = true,
245       .EXT_conditional_rendering             = device->info.verx10 >= 75,
246       .EXT_conservative_rasterization        = device->info.ver >= 9,
247       .EXT_custom_border_color               = device->info.ver >= 8,
248       .EXT_depth_clip_enable                 = true,
249       .EXT_descriptor_indexing               = device->has_a64_buffer_access &&
250                                                device->has_bindless_images,
251 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
252       .EXT_display_control                   = true,
253 #endif
254       .EXT_extended_dynamic_state            = true,
255       .EXT_extended_dynamic_state2           = true,
256       .EXT_external_memory_dma_buf           = true,
257       .EXT_external_memory_host              = true,
258       .EXT_fragment_shader_interlock         = device->info.ver >= 9,
259       .EXT_global_priority                   = device->has_context_priority,
260       .EXT_host_query_reset                  = true,
261       .EXT_image_robustness                  = true,
262       .EXT_image_drm_format_modifier         = true,
263       .EXT_index_type_uint8                  = true,
264       .EXT_inline_uniform_block              = true,
265       .EXT_line_rasterization                = true,
266       .EXT_memory_budget                     = device->sys.available,
267       .EXT_pci_bus_info                      = true,
268       .EXT_physical_device_drm               = true,
269       .EXT_pipeline_creation_cache_control   = true,
270       .EXT_pipeline_creation_feedback        = true,
271       .EXT_post_depth_coverage               = device->info.ver >= 9,
272       .EXT_primitive_topology_list_restart   = true,
273       .EXT_private_data                      = true,
274       .EXT_provoking_vertex                  = true,
275       .EXT_queue_family_foreign              = true,
276       .EXT_robustness2                       = true,
277       .EXT_sample_locations                  = true,
278       .EXT_sampler_filter_minmax             = device->info.ver >= 9,
279       .EXT_scalar_block_layout               = true,
280       .EXT_separate_stencil_usage            = true,
281       .EXT_shader_atomic_float               = true,
282       .EXT_shader_atomic_float2              = device->info.ver >= 9,
283       .EXT_shader_demote_to_helper_invocation = true,
284       .EXT_shader_stencil_export             = device->info.ver >= 9,
285       .EXT_shader_subgroup_ballot            = true,
286       .EXT_shader_subgroup_vote              = true,
287       .EXT_shader_viewport_index_layer       = true,
288       .EXT_subgroup_size_control             = true,
289       .EXT_texel_buffer_alignment            = true,
290       .EXT_transform_feedback                = true,
291       .EXT_vertex_attribute_divisor          = true,
292       .EXT_ycbcr_image_arrays                = true,
293 #ifdef ANDROID
294       .ANDROID_external_memory_android_hardware_buffer = true,
295       .ANDROID_native_buffer                 = true,
296 #endif
297       .GOOGLE_decorate_string                = true,
298       .GOOGLE_hlsl_functionality1            = true,
299       .GOOGLE_user_type                      = true,
300       .INTEL_performance_query               = device->perf &&
301                                                device->perf->i915_perf_version >= 3,
302       .INTEL_shader_integer_functions2       = device->info.ver >= 8,
303       .EXT_multi_draw                        = true,
304       .NV_compute_shader_derivatives         = true,
305    };
306 }
307 
308 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t total_ram)309 anv_compute_sys_heap_size(struct anv_physical_device *device,
310                           uint64_t total_ram)
311 {
312    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
313     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
314     */
315    uint64_t available_ram;
316    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
317       available_ram = total_ram / 2;
318    else
319       available_ram = total_ram * 3 / 4;
320 
321    /* We also want to leave some padding for things we allocate in the driver,
322     * so don't go over 3/4 of the GTT either.
323     */
324    available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
325 
326    if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
327       /* When running with an overridden PCI ID, we may get a GTT size from
328        * the kernel that is greater than 2 GiB but the execbuf check for 48bit
329        * address support can still fail.  Just clamp the address space size to
330        * 2 GiB if we don't have 48-bit support.
331        */
332       mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
333                 "not support for 48-bit addresses",
334                 __FILE__, __LINE__);
335       available_ram = 2ull << 30;
336    }
337 
338    return available_ram;
339 }
340 
341 static VkResult MUST_CHECK
anv_gather_meminfo(struct anv_physical_device * device,int fd,bool update)342 anv_gather_meminfo(struct anv_physical_device *device, int fd, bool update)
343 {
344    char sys_mem_regions[sizeof(struct drm_i915_query_memory_regions) +
345 	                sizeof(struct drm_i915_memory_region_info)];
346 
347    struct drm_i915_query_memory_regions *mem_regions =
348       intel_i915_query_alloc(fd, DRM_I915_QUERY_MEMORY_REGIONS);
349    if (mem_regions == NULL) {
350       if (device->info.has_local_mem) {
351          return vk_errorf(device, VK_ERROR_INCOMPATIBLE_DRIVER,
352                           "failed to memory regions: %m");
353       }
354 
355       uint64_t total_phys;
356       if (!os_get_total_physical_memory(&total_phys)) {
357          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
358                           "failed to get total physical memory: %m");
359       }
360 
361       uint64_t available;
362       if (!os_get_available_system_memory(&available))
363          available = 0; /* Silently disable VK_EXT_memory_budget */
364 
365       /* The kernel query failed.  Fake it using OS memory queries.  This
366        * should be roughly the same for integrated GPUs.
367        */
368       mem_regions = (void *)sys_mem_regions;
369       mem_regions->num_regions = 1;
370       mem_regions->regions[0] = (struct drm_i915_memory_region_info) {
371          .region.memory_class = I915_MEMORY_CLASS_SYSTEM,
372          .probed_size = total_phys,
373          .unallocated_size = available,
374       };
375    }
376 
377    for(int i = 0; i < mem_regions->num_regions; i++) {
378       struct drm_i915_memory_region_info *info = &mem_regions->regions[i];
379 
380       struct anv_memregion *region;
381       switch (info->region.memory_class) {
382       case I915_MEMORY_CLASS_SYSTEM:
383          region = &device->sys;
384          break;
385       case I915_MEMORY_CLASS_DEVICE:
386          region = &device->vram;
387          break;
388       default:
389          /* We don't know what kind of memory this is */
390          continue;
391       }
392 
393       uint64_t size = info->probed_size;
394       if (info->region.memory_class == I915_MEMORY_CLASS_SYSTEM)
395          size = anv_compute_sys_heap_size(device, size);
396 
397       uint64_t available = MIN2(size, info->unallocated_size);
398 
399       if (update) {
400          assert(region->region.memory_class == info->region.memory_class);
401          assert(region->region.memory_instance == info->region.memory_instance);
402          assert(region->size == size);
403       } else {
404          region->region = info->region;
405          region->size = size;
406       }
407       region->available = available;
408    }
409 
410    if (mem_regions != (void *)sys_mem_regions)
411       free(mem_regions);
412 
413    return VK_SUCCESS;
414 }
415 
416 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)417 anv_init_meminfo(struct anv_physical_device *device, int fd)
418 {
419    return anv_gather_meminfo(device, fd, false);
420 }
421 
422 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)423 anv_update_meminfo(struct anv_physical_device *device, int fd)
424 {
425    ASSERTED VkResult result = anv_gather_meminfo(device, fd, true);
426    assert(result == VK_SUCCESS);
427 }
428 
429 
430 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)431 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
432 {
433    if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
434                                  &device->gtt_size) == -1) {
435       /* If, for whatever reason, we can't actually get the GTT size from the
436        * kernel (too old?) fall back to the aperture size.
437        */
438       anv_perf_warn(VK_LOG_NO_OBJS(&device->instance->vk),
439                     "Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
440 
441       if (intel_get_aperture_size(fd, &device->gtt_size) == -1) {
442          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
443                           "failed to get aperture size: %m");
444       }
445    }
446 
447    /* We only allow 48-bit addresses with softpin because knowing the actual
448     * address is required for the vertex cache flush workaround.
449     */
450    device->supports_48bit_addresses = (device->info.ver >= 8) &&
451                                       device->gtt_size > (4ULL << 30 /* GiB */);
452 
453    VkResult result = anv_init_meminfo(device, fd);
454    if (result != VK_SUCCESS)
455       return result;
456 
457    assert(device->sys.size != 0);
458 
459    if (device->vram.size > 0) {
460       /* We can create 2 different heaps when we have local memory support,
461        * first heap with local memory size and second with system memory size.
462        */
463       device->memory.heap_count = 2;
464       device->memory.heaps[0] = (struct anv_memory_heap) {
465          .size = device->vram.size,
466          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
467          .is_local_mem = true,
468       };
469       device->memory.heaps[1] = (struct anv_memory_heap) {
470          .size = device->sys.size,
471          .flags = 0,
472          .is_local_mem = false,
473       };
474 
475       device->memory.type_count = 3;
476       device->memory.types[0] = (struct anv_memory_type) {
477          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
478          .heapIndex = 0,
479       };
480       device->memory.types[1] = (struct anv_memory_type) {
481          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
482                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
483                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
484          .heapIndex = 1,
485       };
486       device->memory.types[2] = (struct anv_memory_type) {
487          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
488                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
489                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
490          .heapIndex = 0,
491       };
492    } else if (device->info.has_llc) {
493       device->memory.heap_count = 1;
494       device->memory.heaps[0] = (struct anv_memory_heap) {
495          .size = device->sys.size,
496          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
497          .is_local_mem = false,
498       };
499 
500       /* Big core GPUs share LLC with the CPU and thus one memory type can be
501        * both cached and coherent at the same time.
502        */
503       device->memory.type_count = 1;
504       device->memory.types[0] = (struct anv_memory_type) {
505          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
506                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
507                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
508                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
509          .heapIndex = 0,
510       };
511    } else {
512       device->memory.heap_count = 1;
513       device->memory.heaps[0] = (struct anv_memory_heap) {
514          .size = device->sys.size,
515          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
516          .is_local_mem = false,
517       };
518 
519       /* The spec requires that we expose a host-visible, coherent memory
520        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
521        * to give the application a choice between cached, but not coherent and
522        * coherent but uncached (WC though).
523        */
524       device->memory.type_count = 2;
525       device->memory.types[0] = (struct anv_memory_type) {
526          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
527                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
528                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
529          .heapIndex = 0,
530       };
531       device->memory.types[1] = (struct anv_memory_type) {
532          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
533                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
534                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
535          .heapIndex = 0,
536       };
537    }
538 
539    device->memory.need_clflush = false;
540    for (unsigned i = 0; i < device->memory.type_count; i++) {
541       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
542       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
543           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
544          device->memory.need_clflush = true;
545    }
546 
547    return VK_SUCCESS;
548 }
549 
550 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)551 anv_physical_device_init_uuids(struct anv_physical_device *device)
552 {
553    const struct build_id_note *note =
554       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
555    if (!note) {
556       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
557                        "Failed to find build-id");
558    }
559 
560    unsigned build_id_len = build_id_length(note);
561    if (build_id_len < 20) {
562       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
563                        "build-id too short.  It needs to be a SHA");
564    }
565 
566    memcpy(device->driver_build_sha1, build_id_data(note), 20);
567 
568    struct mesa_sha1 sha1_ctx;
569    uint8_t sha1[20];
570    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
571 
572    /* The pipeline cache UUID is used for determining when a pipeline cache is
573     * invalid.  It needs both a driver build and the PCI ID of the device.
574     */
575    _mesa_sha1_init(&sha1_ctx);
576    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
577    _mesa_sha1_update(&sha1_ctx, &device->info.chipset_id,
578                      sizeof(device->info.chipset_id));
579    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
580                      sizeof(device->always_use_bindless));
581    _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
582                      sizeof(device->has_a64_buffer_access));
583    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_images,
584                      sizeof(device->has_bindless_images));
585    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
586                      sizeof(device->has_bindless_samplers));
587    _mesa_sha1_final(&sha1_ctx, sha1);
588    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
589 
590    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
591    intel_uuid_compute_device_id(device->device_uuid, &device->isl_dev, VK_UUID_SIZE);
592 
593    return VK_SUCCESS;
594 }
595 
596 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)597 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
598 {
599 #ifdef ENABLE_SHADER_CACHE
600    char renderer[10];
601    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
602                                device->info.chipset_id);
603    assert(len == sizeof(renderer) - 2);
604 
605    char timestamp[41];
606    _mesa_sha1_format(timestamp, device->driver_build_sha1);
607 
608    const uint64_t driver_flags =
609       brw_get_compiler_config_value(device->compiler);
610    device->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
611 #else
612    device->disk_cache = NULL;
613 #endif
614 }
615 
616 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)617 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
618 {
619 #ifdef ENABLE_SHADER_CACHE
620    if (device->disk_cache)
621       disk_cache_destroy(device->disk_cache);
622 #else
623    assert(device->disk_cache == NULL);
624 #endif
625 }
626 
627 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
628  * queue overrides.
629  *
630  * To override the number queues:
631  *  * "gc" is for graphics queues with compute support
632  *  * "g" is for graphics queues with no compute support
633  *  * "c" is for compute queues with no graphics support
634  *
635  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
636  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
637  * with compute-only support.
638  *
639  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
640  * include 1 queue with compute-only support, but it will not change the
641  * number of graphics+compute queues.
642  *
643  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
644  * to include 1 queue with compute-only support, and it would override the
645  * number of graphics+compute queues to be 0.
646  */
647 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)648 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
649 {
650    int gc_override = -1;
651    int g_override = -1;
652    int c_override = -1;
653    char *env = getenv("ANV_QUEUE_OVERRIDE");
654 
655    if (env == NULL)
656       return;
657 
658    env = strdup(env);
659    char *save = NULL;
660    char *next = strtok_r(env, ",", &save);
661    while (next != NULL) {
662       if (strncmp(next, "gc=", 3) == 0) {
663          gc_override = strtol(next + 3, NULL, 0);
664       } else if (strncmp(next, "g=", 2) == 0) {
665          g_override = strtol(next + 2, NULL, 0);
666       } else if (strncmp(next, "c=", 2) == 0) {
667          c_override = strtol(next + 2, NULL, 0);
668       } else {
669          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
670       }
671       next = strtok_r(NULL, ",", &save);
672    }
673    free(env);
674    if (gc_override >= 0)
675       *gc_count = gc_override;
676    if (g_override >= 0)
677       *g_count = g_override;
678    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
679       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
680                 "Vulkan specification");
681    if (c_override >= 0)
682       *c_count = c_override;
683 }
684 
685 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)686 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
687 {
688    uint32_t family_count = 0;
689 
690    if (pdevice->engine_info) {
691       int gc_count =
692          anv_gem_count_engines(pdevice->engine_info, I915_ENGINE_CLASS_RENDER);
693       int g_count = 0;
694       int c_count = 0;
695 
696       anv_override_engine_counts(&gc_count, &g_count, &c_count);
697 
698       if (gc_count > 0) {
699          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
700             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
701                           VK_QUEUE_COMPUTE_BIT |
702                           VK_QUEUE_TRANSFER_BIT,
703             .queueCount = gc_count,
704             .engine_class = I915_ENGINE_CLASS_RENDER,
705          };
706       }
707       if (g_count > 0) {
708          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
709             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
710                           VK_QUEUE_TRANSFER_BIT,
711             .queueCount = g_count,
712             .engine_class = I915_ENGINE_CLASS_RENDER,
713          };
714       }
715       if (c_count > 0) {
716          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
717             .queueFlags = VK_QUEUE_COMPUTE_BIT |
718                           VK_QUEUE_TRANSFER_BIT,
719             .queueCount = c_count,
720             .engine_class = I915_ENGINE_CLASS_RENDER,
721          };
722       }
723       /* Increase count below when other families are added as a reminder to
724        * increase the ANV_MAX_QUEUE_FAMILIES value.
725        */
726       STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
727    } else {
728       /* Default to a single render queue */
729       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
730          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
731                        VK_QUEUE_COMPUTE_BIT |
732                        VK_QUEUE_TRANSFER_BIT,
733          .queueCount = 1,
734          .engine_class = I915_ENGINE_CLASS_RENDER,
735       };
736       family_count = 1;
737    }
738    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
739    pdevice->queue.family_count = family_count;
740 }
741 
742 static VkResult
anv_physical_device_try_create(struct anv_instance * instance,drmDevicePtr drm_device,struct anv_physical_device ** device_out)743 anv_physical_device_try_create(struct anv_instance *instance,
744                                drmDevicePtr drm_device,
745                                struct anv_physical_device **device_out)
746 {
747    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
748    const char *path = drm_device->nodes[DRM_NODE_RENDER];
749    VkResult result;
750    int fd;
751    int master_fd = -1;
752 
753    brw_process_intel_debug_variable();
754 
755    fd = open(path, O_RDWR | O_CLOEXEC);
756    if (fd < 0) {
757       if (errno == ENOMEM) {
758          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
759                           "Unable to open device %s: out of memory", path);
760       }
761       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
762                        "Unable to open device %s: %m", path);
763    }
764 
765    struct intel_device_info devinfo;
766    if (!intel_get_device_info_from_fd(fd, &devinfo)) {
767       result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
768       goto fail_fd;
769    }
770 
771    bool is_alpha = true;
772    if (devinfo.is_haswell) {
773       mesa_logw("Haswell Vulkan support is incomplete");
774    } else if (devinfo.ver == 7 && !devinfo.is_baytrail) {
775       mesa_logw("Ivy Bridge Vulkan support is incomplete");
776    } else if (devinfo.ver == 7 && devinfo.is_baytrail) {
777       mesa_logw("Bay Trail Vulkan support is incomplete");
778    } else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
779       /* Gfx8-12 fully supported */
780       is_alpha = false;
781    } else {
782       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
783                          "Vulkan not yet supported on %s", devinfo.name);
784       goto fail_fd;
785    }
786 
787    struct anv_physical_device *device =
788       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
789                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
790    if (device == NULL) {
791       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
792       goto fail_fd;
793    }
794 
795    struct vk_physical_device_dispatch_table dispatch_table;
796    vk_physical_device_dispatch_table_from_entrypoints(
797       &dispatch_table, &anv_physical_device_entrypoints, true);
798    vk_physical_device_dispatch_table_from_entrypoints(
799       &dispatch_table, &wsi_physical_device_entrypoints, false);
800 
801    result = vk_physical_device_init(&device->vk, &instance->vk,
802                                     NULL, /* We set up extensions later */
803                                     &dispatch_table);
804    if (result != VK_SUCCESS) {
805       vk_error(instance, result);
806       goto fail_alloc;
807    }
808    device->instance = instance;
809 
810    assert(strlen(path) < ARRAY_SIZE(device->path));
811    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
812 
813    device->info = devinfo;
814    device->is_alpha = is_alpha;
815 
816    device->pci_info.domain = drm_device->businfo.pci->domain;
817    device->pci_info.bus = drm_device->businfo.pci->bus;
818    device->pci_info.device = drm_device->businfo.pci->dev;
819    device->pci_info.function = drm_device->businfo.pci->func;
820 
821    device->cmd_parser_version = -1;
822    if (device->info.ver == 7) {
823       device->cmd_parser_version =
824          anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
825       if (device->cmd_parser_version == -1) {
826          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
827                             "failed to get command parser version");
828          goto fail_base;
829       }
830    }
831 
832    if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
833       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
834                          "kernel missing gem wait");
835       goto fail_base;
836    }
837 
838    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
839       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
840                          "kernel missing execbuf2");
841       goto fail_base;
842    }
843 
844    if (!device->info.has_llc &&
845        anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
846       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
847                          "kernel missing wc mmap");
848       goto fail_base;
849    }
850 
851    if (device->info.ver >= 8 && !device->info.is_cherryview &&
852        !anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)) {
853       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
854                          "kernel missing softpin");
855       goto fail_alloc;
856    }
857 
858    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY)) {
859       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
860                          "kernel missing syncobj support");
861       goto fail_base;
862    }
863 
864    device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
865    device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
866    device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE);
867    device->has_syncobj_wait = anv_gem_supports_syncobj_wait(fd);
868    device->has_syncobj_wait_available =
869       anv_gem_get_drm_cap(fd, DRM_CAP_SYNCOBJ_TIMELINE) != 0;
870 
871    device->has_context_priority = anv_gem_has_context_priority(fd);
872 
873    /* Initialize memory regions struct to 0. */
874    memset(&device->vram, 0, sizeof(device->vram));
875    memset(&device->sys, 0, sizeof(device->sys));
876 
877    result = anv_physical_device_init_heaps(device, fd);
878    if (result != VK_SUCCESS)
879       goto fail_base;
880 
881    device->use_softpin = device->info.ver >= 8 &&
882                          !device->info.is_cherryview;
883    assert(device->use_softpin == device->supports_48bit_addresses);
884 
885    device->has_context_isolation =
886       anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION);
887 
888    device->has_exec_timeline =
889       anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES);
890    if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
891       device->has_exec_timeline = false;
892 
893    device->has_thread_submit =
894       device->has_syncobj_wait_available && device->has_exec_timeline;
895 
896    device->always_use_bindless =
897       env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
898 
899    device->use_call_secondary =
900       device->use_softpin &&
901       !env_var_as_boolean("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
902 
903    /* We first got the A64 messages on broadwell and we can only use them if
904     * we can pass addresses directly into the shader which requires softpin.
905     */
906    device->has_a64_buffer_access = device->info.ver >= 8 &&
907                                    device->use_softpin;
908 
909    /* We first get bindless image access on Skylake.
910     */
911    device->has_bindless_images = device->info.ver >= 9;
912 
913    /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
914     * because it's just a matter of setting the sampler address in the sample
915     * message header.  However, we've not bothered to wire it up for vec4 so
916     * we leave it disabled on gfx7.
917     */
918    device->has_bindless_samplers = device->info.ver >= 8;
919 
920    device->has_implicit_ccs = device->info.has_aux_map;
921 
922    /* Check if we can read the GPU timestamp register from the CPU */
923    uint64_t u64_ignore;
924    device->has_reg_timestamp = anv_gem_reg_read(fd, TIMESTAMP | I915_REG_READ_8B_WA,
925                                                 &u64_ignore) == 0;
926 
927    device->always_flush_cache = INTEL_DEBUG(DEBUG_SYNC) ||
928       driQueryOptionb(&instance->dri_options, "always_flush_cache");
929 
930    device->has_mmap_offset =
931       anv_gem_get_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
932 
933    device->has_userptr_probe =
934       anv_gem_get_param(fd, I915_PARAM_HAS_USERPTR_PROBE);
935 
936    device->compiler = brw_compiler_create(NULL, &device->info);
937    if (device->compiler == NULL) {
938       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
939       goto fail_base;
940    }
941    device->compiler->shader_debug_log = compiler_debug_log;
942    device->compiler->shader_perf_log = compiler_perf_log;
943    device->compiler->supports_pull_constants = false;
944    device->compiler->constant_buffer_0_is_relative =
945       device->info.ver < 8 || !device->has_context_isolation;
946    device->compiler->supports_shader_constants = true;
947    device->compiler->compact_params = false;
948    device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
949 
950    /* Broadwell PRM says:
951     *
952     *   "Before Gfx8, there was a historical configuration control field to
953     *    swizzle address bit[6] for in X/Y tiling modes. This was set in three
954     *    different places: TILECTL[1:0], ARB_MODE[5:4], and
955     *    DISP_ARB_CTL[14:13].
956     *
957     *    For Gfx8 and subsequent generations, the swizzle fields are all
958     *    reserved, and the CPU's memory controller performs all address
959     *    swizzling modifications."
960     */
961    bool swizzled =
962       device->info.ver < 8 && anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
963 
964    isl_device_init(&device->isl_dev, &device->info, swizzled);
965 
966    result = anv_physical_device_init_uuids(device);
967    if (result != VK_SUCCESS)
968       goto fail_compiler;
969 
970    anv_physical_device_init_disk_cache(device);
971 
972    if (instance->vk.enabled_extensions.KHR_display) {
973       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
974       if (master_fd >= 0) {
975          /* prod the device with a GETPARAM call which will fail if
976           * we don't have permission to even render on this device
977           */
978          if (anv_gem_get_param(master_fd, I915_PARAM_CHIPSET_ID) == 0) {
979             close(master_fd);
980             master_fd = -1;
981          }
982       }
983    }
984    device->master_fd = master_fd;
985 
986    device->engine_info = anv_gem_get_engine_info(fd);
987    anv_physical_device_init_queue_families(device);
988 
989    result = anv_init_wsi(device);
990    if (result != VK_SUCCESS)
991       goto fail_engine_info;
992 
993    anv_physical_device_init_perf(device, fd);
994 
995    anv_measure_device_init(device);
996 
997    get_device_extensions(device, &device->vk.supported_extensions);
998 
999    device->local_fd = fd;
1000 
1001    anv_genX(&device->info, init_physical_device_state)(device);
1002 
1003    *device_out = device;
1004 
1005    struct stat st;
1006 
1007    if (stat(primary_path, &st) == 0) {
1008       device->has_master = true;
1009       device->master_major = major(st.st_rdev);
1010       device->master_minor = minor(st.st_rdev);
1011    } else {
1012       device->has_master = false;
1013       device->master_major = 0;
1014       device->master_minor = 0;
1015    }
1016 
1017    if (stat(path, &st) == 0) {
1018       device->has_local = true;
1019       device->local_major = major(st.st_rdev);
1020       device->local_minor = minor(st.st_rdev);
1021    } else {
1022       device->has_local = false;
1023       device->local_major = 0;
1024       device->local_minor = 0;
1025    }
1026 
1027    return VK_SUCCESS;
1028 
1029 fail_engine_info:
1030    free(device->engine_info);
1031    anv_physical_device_free_disk_cache(device);
1032 fail_compiler:
1033    ralloc_free(device->compiler);
1034 fail_base:
1035    vk_physical_device_finish(&device->vk);
1036 fail_alloc:
1037    vk_free(&instance->vk.alloc, device);
1038 fail_fd:
1039    close(fd);
1040    if (master_fd != -1)
1041       close(master_fd);
1042    return result;
1043 }
1044 
1045 static void
anv_physical_device_destroy(struct anv_physical_device * device)1046 anv_physical_device_destroy(struct anv_physical_device *device)
1047 {
1048    anv_finish_wsi(device);
1049    anv_measure_device_destroy(device);
1050    free(device->engine_info);
1051    anv_physical_device_free_disk_cache(device);
1052    ralloc_free(device->compiler);
1053    ralloc_free(device->perf);
1054    close(device->local_fd);
1055    if (device->master_fd >= 0)
1056       close(device->master_fd);
1057    vk_physical_device_finish(&device->vk);
1058    vk_free(&device->instance->vk.alloc, device);
1059 }
1060 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1061 VkResult anv_EnumerateInstanceExtensionProperties(
1062     const char*                                 pLayerName,
1063     uint32_t*                                   pPropertyCount,
1064     VkExtensionProperties*                      pProperties)
1065 {
1066    if (pLayerName)
1067       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1068 
1069    return vk_enumerate_instance_extension_properties(
1070       &instance_extensions, pPropertyCount, pProperties);
1071 }
1072 
1073 static void
anv_init_dri_options(struct anv_instance * instance)1074 anv_init_dri_options(struct anv_instance *instance)
1075 {
1076    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1077                       ARRAY_SIZE(anv_dri_options));
1078    driParseConfigFiles(&instance->dri_options,
1079                        &instance->available_dri_options, 0, "anv", NULL, NULL,
1080                        instance->vk.app_info.app_name,
1081                        instance->vk.app_info.app_version,
1082                        instance->vk.app_info.engine_name,
1083                        instance->vk.app_info.engine_version);
1084 }
1085 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1086 VkResult anv_CreateInstance(
1087     const VkInstanceCreateInfo*                 pCreateInfo,
1088     const VkAllocationCallbacks*                pAllocator,
1089     VkInstance*                                 pInstance)
1090 {
1091    struct anv_instance *instance;
1092    VkResult result;
1093 
1094    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1095 
1096    if (pAllocator == NULL)
1097       pAllocator = vk_default_allocator();
1098 
1099    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1100                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1101    if (!instance)
1102       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1103 
1104    struct vk_instance_dispatch_table dispatch_table;
1105    vk_instance_dispatch_table_from_entrypoints(
1106       &dispatch_table, &anv_instance_entrypoints, true);
1107    vk_instance_dispatch_table_from_entrypoints(
1108       &dispatch_table, &wsi_instance_entrypoints, false);
1109 
1110    result = vk_instance_init(&instance->vk, &instance_extensions,
1111                              &dispatch_table, pCreateInfo, pAllocator);
1112    if (result != VK_SUCCESS) {
1113       vk_free(pAllocator, instance);
1114       return vk_error(NULL, result);
1115    }
1116 
1117    instance->physical_devices_enumerated = false;
1118    list_inithead(&instance->physical_devices);
1119 
1120    instance->pipeline_cache_enabled =
1121       env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
1122 
1123    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1124 
1125    anv_init_dri_options(instance);
1126 
1127    *pInstance = anv_instance_to_handle(instance);
1128 
1129    return VK_SUCCESS;
1130 }
1131 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1132 void anv_DestroyInstance(
1133     VkInstance                                  _instance,
1134     const VkAllocationCallbacks*                pAllocator)
1135 {
1136    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1137 
1138    if (!instance)
1139       return;
1140 
1141    list_for_each_entry_safe(struct anv_physical_device, pdevice,
1142                             &instance->physical_devices, link)
1143       anv_physical_device_destroy(pdevice);
1144 
1145    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1146 
1147    driDestroyOptionCache(&instance->dri_options);
1148    driDestroyOptionInfo(&instance->available_dri_options);
1149 
1150    vk_instance_finish(&instance->vk);
1151    vk_free(&instance->vk.alloc, instance);
1152 }
1153 
1154 static VkResult
anv_enumerate_physical_devices(struct anv_instance * instance)1155 anv_enumerate_physical_devices(struct anv_instance *instance)
1156 {
1157    if (instance->physical_devices_enumerated)
1158       return VK_SUCCESS;
1159 
1160    instance->physical_devices_enumerated = true;
1161 
1162    /* TODO: Check for more devices ? */
1163    drmDevicePtr devices[8];
1164    int max_devices;
1165 
1166    max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1167    if (max_devices < 1)
1168       return VK_SUCCESS;
1169 
1170    VkResult result = VK_SUCCESS;
1171    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1172       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1173           devices[i]->bustype == DRM_BUS_PCI &&
1174           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
1175 
1176          struct anv_physical_device *pdevice;
1177          result = anv_physical_device_try_create(instance, devices[i],
1178                                                  &pdevice);
1179          /* Incompatible DRM device, skip. */
1180          if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1181             result = VK_SUCCESS;
1182             continue;
1183          }
1184 
1185          /* Error creating the physical device, report the error. */
1186          if (result != VK_SUCCESS)
1187             break;
1188 
1189          list_addtail(&pdevice->link, &instance->physical_devices);
1190       }
1191    }
1192    drmFreeDevices(devices, max_devices);
1193 
1194    /* If we successfully enumerated any devices, call it success */
1195    return result;
1196 }
1197 
anv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)1198 VkResult anv_EnumeratePhysicalDevices(
1199     VkInstance                                  _instance,
1200     uint32_t*                                   pPhysicalDeviceCount,
1201     VkPhysicalDevice*                           pPhysicalDevices)
1202 {
1203    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1204    VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
1205 
1206    VkResult result = anv_enumerate_physical_devices(instance);
1207    if (result != VK_SUCCESS)
1208       return result;
1209 
1210    list_for_each_entry(struct anv_physical_device, pdevice,
1211                        &instance->physical_devices, link) {
1212       vk_outarray_append(&out, i) {
1213          *i = anv_physical_device_to_handle(pdevice);
1214       }
1215    }
1216 
1217    return vk_outarray_status(&out);
1218 }
1219 
anv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)1220 VkResult anv_EnumeratePhysicalDeviceGroups(
1221     VkInstance                                  _instance,
1222     uint32_t*                                   pPhysicalDeviceGroupCount,
1223     VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties)
1224 {
1225    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1226    VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
1227                          pPhysicalDeviceGroupCount);
1228 
1229    VkResult result = anv_enumerate_physical_devices(instance);
1230    if (result != VK_SUCCESS)
1231       return result;
1232 
1233    list_for_each_entry(struct anv_physical_device, pdevice,
1234                        &instance->physical_devices, link) {
1235       vk_outarray_append(&out, p) {
1236          p->physicalDeviceCount = 1;
1237          memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1238          p->physicalDevices[0] = anv_physical_device_to_handle(pdevice);
1239          p->subsetAllocation = false;
1240 
1241          vk_foreach_struct(ext, p->pNext)
1242             anv_debug_ignored_stype(ext->sType);
1243       }
1244    }
1245 
1246    return vk_outarray_status(&out);
1247 }
1248 
anv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)1249 void anv_GetPhysicalDeviceFeatures(
1250     VkPhysicalDevice                            physicalDevice,
1251     VkPhysicalDeviceFeatures*                   pFeatures)
1252 {
1253    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1254 
1255    /* Just pick one; they're all the same */
1256    const bool has_astc_ldr =
1257       isl_format_supports_sampling(&pdevice->info,
1258                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
1259 
1260    *pFeatures = (VkPhysicalDeviceFeatures) {
1261       .robustBufferAccess                       = true,
1262       .fullDrawIndexUint32                      = true,
1263       .imageCubeArray                           = true,
1264       .independentBlend                         = true,
1265       .geometryShader                           = true,
1266       .tessellationShader                       = true,
1267       .sampleRateShading                        = true,
1268       .dualSrcBlend                             = true,
1269       .logicOp                                  = true,
1270       .multiDrawIndirect                        = true,
1271       .drawIndirectFirstInstance                = true,
1272       .depthClamp                               = true,
1273       .depthBiasClamp                           = true,
1274       .fillModeNonSolid                         = true,
1275       .depthBounds                              = pdevice->info.ver >= 12,
1276       .wideLines                                = true,
1277       .largePoints                              = true,
1278       .alphaToOne                               = true,
1279       .multiViewport                            = true,
1280       .samplerAnisotropy                        = true,
1281       .textureCompressionETC2                   = pdevice->info.ver >= 8 ||
1282                                                   pdevice->info.is_baytrail,
1283       .textureCompressionASTC_LDR               = has_astc_ldr,
1284       .textureCompressionBC                     = true,
1285       .occlusionQueryPrecise                    = true,
1286       .pipelineStatisticsQuery                  = true,
1287       .fragmentStoresAndAtomics                 = true,
1288       .shaderTessellationAndGeometryPointSize   = true,
1289       .shaderImageGatherExtended                = true,
1290       .shaderStorageImageExtendedFormats        = true,
1291       .shaderStorageImageMultisample            = false,
1292       .shaderStorageImageReadWithoutFormat      = false,
1293       .shaderStorageImageWriteWithoutFormat     = true,
1294       .shaderUniformBufferArrayDynamicIndexing  = true,
1295       .shaderSampledImageArrayDynamicIndexing   = true,
1296       .shaderStorageBufferArrayDynamicIndexing  = true,
1297       .shaderStorageImageArrayDynamicIndexing   = true,
1298       .shaderClipDistance                       = true,
1299       .shaderCullDistance                       = true,
1300       .shaderFloat64                            = pdevice->info.ver >= 8 &&
1301                                                   pdevice->info.has_64bit_float,
1302       .shaderInt64                              = pdevice->info.ver >= 8,
1303       .shaderInt16                              = pdevice->info.ver >= 8,
1304       .shaderResourceMinLod                     = pdevice->info.ver >= 9,
1305       .variableMultisampleRate                  = true,
1306       .inheritedQueries                         = true,
1307    };
1308 
1309    /* We can't do image stores in vec4 shaders */
1310    pFeatures->vertexPipelineStoresAndAtomics =
1311       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1312       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
1313 
1314    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1315 
1316    /* The new DOOM and Wolfenstein games require depthBounds without
1317     * checking for it.  They seem to run fine without it so just claim it's
1318     * there and accept the consequences.
1319     */
1320    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1321       pFeatures->depthBounds = true;
1322 }
1323 
1324 static void
anv_get_physical_device_features_1_1(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)1325 anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
1326                                      VkPhysicalDeviceVulkan11Features *f)
1327 {
1328    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1329 
1330    f->storageBuffer16BitAccess            = pdevice->info.ver >= 8;
1331    f->uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8;
1332    f->storagePushConstant16               = pdevice->info.ver >= 8;
1333    f->storageInputOutput16                = false;
1334    f->multiview                           = true;
1335    f->multiviewGeometryShader             = true;
1336    f->multiviewTessellationShader         = true;
1337    f->variablePointersStorageBuffer       = true;
1338    f->variablePointers                    = true;
1339    f->protectedMemory                     = false;
1340    f->samplerYcbcrConversion              = true;
1341    f->shaderDrawParameters                = true;
1342 }
1343 
1344 static void
anv_get_physical_device_features_1_2(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)1345 anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
1346                                      VkPhysicalDeviceVulkan12Features *f)
1347 {
1348    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1349 
1350    f->samplerMirrorClampToEdge            = true;
1351    f->drawIndirectCount                   = true;
1352    f->storageBuffer8BitAccess             = pdevice->info.ver >= 8;
1353    f->uniformAndStorageBuffer8BitAccess   = pdevice->info.ver >= 8;
1354    f->storagePushConstant8                = pdevice->info.ver >= 8;
1355    f->shaderBufferInt64Atomics            = pdevice->info.ver >= 9 &&
1356                                             pdevice->use_softpin;
1357    f->shaderSharedInt64Atomics            = false;
1358    f->shaderFloat16                       = pdevice->info.ver >= 8;
1359    f->shaderInt8                          = pdevice->info.ver >= 8;
1360 
1361    bool descIndexing = pdevice->has_a64_buffer_access &&
1362                        pdevice->has_bindless_images;
1363    f->descriptorIndexing                                 = descIndexing;
1364    f->shaderInputAttachmentArrayDynamicIndexing          = false;
1365    f->shaderUniformTexelBufferArrayDynamicIndexing       = descIndexing;
1366    f->shaderStorageTexelBufferArrayDynamicIndexing       = descIndexing;
1367    f->shaderUniformBufferArrayNonUniformIndexing         = descIndexing;
1368    f->shaderSampledImageArrayNonUniformIndexing          = descIndexing;
1369    f->shaderStorageBufferArrayNonUniformIndexing         = descIndexing;
1370    f->shaderStorageImageArrayNonUniformIndexing          = descIndexing;
1371    f->shaderInputAttachmentArrayNonUniformIndexing       = false;
1372    f->shaderUniformTexelBufferArrayNonUniformIndexing    = descIndexing;
1373    f->shaderStorageTexelBufferArrayNonUniformIndexing    = descIndexing;
1374    f->descriptorBindingUniformBufferUpdateAfterBind      = descIndexing;
1375    f->descriptorBindingSampledImageUpdateAfterBind       = descIndexing;
1376    f->descriptorBindingStorageImageUpdateAfterBind       = descIndexing;
1377    f->descriptorBindingStorageBufferUpdateAfterBind      = descIndexing;
1378    f->descriptorBindingUniformTexelBufferUpdateAfterBind = descIndexing;
1379    f->descriptorBindingStorageTexelBufferUpdateAfterBind = descIndexing;
1380    f->descriptorBindingUpdateUnusedWhilePending          = descIndexing;
1381    f->descriptorBindingPartiallyBound                    = descIndexing;
1382    f->descriptorBindingVariableDescriptorCount           = descIndexing;
1383    f->runtimeDescriptorArray                             = descIndexing;
1384 
1385    f->samplerFilterMinmax                 = pdevice->info.ver >= 9;
1386    f->scalarBlockLayout                   = true;
1387    f->imagelessFramebuffer                = true;
1388    f->uniformBufferStandardLayout         = true;
1389    f->shaderSubgroupExtendedTypes         = true;
1390    f->separateDepthStencilLayouts         = true;
1391    f->hostQueryReset                      = true;
1392    f->timelineSemaphore                   = true;
1393    f->bufferDeviceAddress                 = pdevice->has_a64_buffer_access;
1394    f->bufferDeviceAddressCaptureReplay    = pdevice->has_a64_buffer_access;
1395    f->bufferDeviceAddressMultiDevice      = false;
1396    f->vulkanMemoryModel                   = true;
1397    f->vulkanMemoryModelDeviceScope        = true;
1398    f->vulkanMemoryModelAvailabilityVisibilityChains = true;
1399    f->shaderOutputViewportIndex           = true;
1400    f->shaderOutputLayer                   = true;
1401    f->subgroupBroadcastDynamicId          = true;
1402 }
1403 
anv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1404 void anv_GetPhysicalDeviceFeatures2(
1405     VkPhysicalDevice                            physicalDevice,
1406     VkPhysicalDeviceFeatures2*                  pFeatures)
1407 {
1408    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1409    anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1410 
1411    VkPhysicalDeviceVulkan11Features core_1_1 = {
1412       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1413    };
1414    anv_get_physical_device_features_1_1(pdevice, &core_1_1);
1415 
1416    VkPhysicalDeviceVulkan12Features core_1_2 = {
1417       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1418    };
1419    anv_get_physical_device_features_1_2(pdevice, &core_1_2);
1420 
1421    vk_foreach_struct(ext, pFeatures->pNext) {
1422       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1423          continue;
1424       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1425          continue;
1426 
1427       switch (ext->sType) {
1428       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1429          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1430             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1431          features->formatA4R4G4B4 = true;
1432          features->formatA4B4G4R4 = false;
1433          break;
1434       }
1435 
1436 
1437       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1438          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
1439          features->accelerationStructure = false;
1440          features->accelerationStructureCaptureReplay = false;
1441          features->accelerationStructureIndirectBuild = false;
1442          features->accelerationStructureHostCommands = false;
1443          features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1444          break;
1445       }
1446 
1447       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1448          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
1449          features->bufferDeviceAddress = pdevice->has_a64_buffer_access;
1450          features->bufferDeviceAddressCaptureReplay = false;
1451          features->bufferDeviceAddressMultiDevice = false;
1452          break;
1453       }
1454 
1455 
1456       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1457          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1458             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1459          features->colorWriteEnable = true;
1460          break;
1461       }
1462 
1463       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1464          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1465             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1466          features->computeDerivativeGroupQuads = true;
1467          features->computeDerivativeGroupLinear = true;
1468          break;
1469       }
1470 
1471       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1472          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1473             (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1474          features->conditionalRendering = pdevice->info.verx10 >= 75;
1475          features->inheritedConditionalRendering = pdevice->info.verx10 >= 75;
1476          break;
1477       }
1478 
1479       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1480          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1481             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1482          features->customBorderColors = pdevice->info.ver >= 8;
1483          features->customBorderColorWithoutFormat = pdevice->info.ver >= 8;
1484          break;
1485       }
1486 
1487       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1488          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1489             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1490          features->depthClipEnable = true;
1491          break;
1492       }
1493 
1494       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
1495          VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
1496             (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
1497          features->fragmentShaderSampleInterlock = pdevice->info.ver >= 9;
1498          features->fragmentShaderPixelInterlock = pdevice->info.ver >= 9;
1499          features->fragmentShaderShadingRateInterlock = false;
1500          break;
1501       }
1502 
1503       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1504          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1505             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1506          features->attachmentFragmentShadingRate = false;
1507          features->pipelineFragmentShadingRate = true;
1508          features->primitiveFragmentShadingRate = false;
1509          break;
1510       }
1511 
1512       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1513          VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1514             (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1515          features->robustImageAccess = true;
1516          break;
1517       }
1518 
1519       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1520          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1521             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1522          features->indexTypeUint8 = true;
1523          break;
1524       }
1525 
1526       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1527          VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1528             (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1529          features->inlineUniformBlock = true;
1530          features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1531          break;
1532       }
1533 
1534       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1535          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1536             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1537          features->rectangularLines = true;
1538          features->bresenhamLines = true;
1539          /* Support for Smooth lines with MSAA was removed on gfx11.  From the
1540           * BSpec section "Multisample ModesState" table for "AA Line Support
1541           * Requirements":
1542           *
1543           *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
1544           *
1545           * Fortunately, this isn't a case most people care about.
1546           */
1547          features->smoothLines = pdevice->info.ver < 10;
1548          features->stippledRectangularLines = false;
1549          features->stippledBresenhamLines = true;
1550          features->stippledSmoothLines = false;
1551          break;
1552       }
1553 
1554       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR: {
1555          VkPhysicalDeviceMaintenance4FeaturesKHR *features =
1556             (VkPhysicalDeviceMaintenance4FeaturesKHR *)ext;
1557          features->maintenance4 = true;
1558          break;
1559       }
1560 
1561       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1562          VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1563             (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1564          feature->performanceCounterQueryPools = true;
1565          /* HW only supports a single configuration at a time. */
1566          feature->performanceCounterMultipleQueryPools = false;
1567          break;
1568       }
1569 
1570       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1571          VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1572             (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1573          features->pipelineCreationCacheControl = true;
1574          break;
1575       }
1576 
1577       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1578          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1579             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1580          features->pipelineExecutableInfo = true;
1581          break;
1582       }
1583 
1584       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1585          VkPhysicalDevicePrivateDataFeaturesEXT *features = (void *)ext;
1586          features->privateData = true;
1587          break;
1588       }
1589 
1590       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1591          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1592             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1593          features->provokingVertexLast = true;
1594          features->transformFeedbackPreservesProvokingVertex = true;
1595          break;
1596       }
1597 
1598       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1599          VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1600          features->robustBufferAccess2 = true;
1601          features->robustImageAccess2 = true;
1602          features->nullDescriptor = true;
1603          break;
1604       }
1605 
1606       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1607          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
1608          features->shaderBufferFloat32Atomics =    true;
1609          features->shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc;
1610          features->shaderBufferFloat64Atomics =    pdevice->info.has_lsc;
1611          features->shaderBufferFloat64AtomicAdd =  false;
1612          features->shaderSharedFloat32Atomics =    true;
1613          features->shaderSharedFloat32AtomicAdd =  false;
1614          features->shaderSharedFloat64Atomics =    false;
1615          features->shaderSharedFloat64AtomicAdd =  false;
1616          features->shaderImageFloat32Atomics =     true;
1617          features->shaderImageFloat32AtomicAdd =   false;
1618          features->sparseImageFloat32Atomics =     false;
1619          features->sparseImageFloat32AtomicAdd =   false;
1620          break;
1621       }
1622 
1623       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1624          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (void *)ext;
1625          features->shaderBufferFloat16Atomics      = false;
1626          features->shaderBufferFloat16AtomicAdd    = false;
1627          features->shaderBufferFloat16AtomicMinMax = false;
1628          features->shaderBufferFloat32AtomicMinMax = pdevice->info.ver >= 9;
1629          features->shaderBufferFloat64AtomicMinMax = pdevice->info.has_lsc;
1630          features->shaderSharedFloat16Atomics      = false;
1631          features->shaderSharedFloat16AtomicAdd    = false;
1632          features->shaderSharedFloat16AtomicMinMax = false;
1633          features->shaderSharedFloat32AtomicMinMax = pdevice->info.ver >= 9;
1634          features->shaderSharedFloat64AtomicMinMax = false;
1635          features->shaderImageFloat32AtomicMinMax  = false;
1636          features->sparseImageFloat32AtomicMinMax  = false;
1637          break;
1638       }
1639 
1640       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1641          VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features = (void *)ext;
1642          features->shaderDemoteToHelperInvocation = true;
1643          break;
1644       }
1645 
1646       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1647          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1648             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1649          features->shaderSubgroupClock = true;
1650          features->shaderDeviceClock = false;
1651          break;
1652       }
1653 
1654       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1655          VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1656             (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1657          features->shaderIntegerFunctions2 = true;
1658          break;
1659       }
1660 
1661       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR: {
1662          VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *features =
1663             (VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *)ext;
1664          features->shaderIntegerDotProduct = true;
1665          break;
1666       };
1667 
1668       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1669          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1670             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1671          features->shaderSubgroupUniformControlFlow = true;
1672          break;
1673       }
1674 
1675       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1676          VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1677             (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1678          features->shaderTerminateInvocation = true;
1679          break;
1680       }
1681 
1682       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1683          VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1684             (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1685          features->subgroupSizeControl = true;
1686          features->computeFullSubgroups = true;
1687          break;
1688       }
1689 
1690       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
1691          VkPhysicalDeviceSynchronization2FeaturesKHR *features =
1692             (VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
1693          features->synchronization2 = true;
1694          break;
1695       }
1696 
1697       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1698          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1699             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1700          features->texelBufferAlignment = true;
1701          break;
1702       }
1703 
1704       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1705          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1706             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1707          features->transformFeedback = true;
1708          features->geometryStreams = true;
1709          break;
1710       }
1711 
1712       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1713          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1714             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1715          features->vertexAttributeInstanceRateDivisor = true;
1716          features->vertexAttributeInstanceRateZeroDivisor = true;
1717          break;
1718       }
1719 
1720       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1721          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1722             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1723          features->workgroupMemoryExplicitLayout = true;
1724          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1725          features->workgroupMemoryExplicitLayout8BitAccess = true;
1726          features->workgroupMemoryExplicitLayout16BitAccess = true;
1727          break;
1728       }
1729 
1730       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1731          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1732             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1733          features->ycbcrImageArrays = true;
1734          break;
1735       }
1736 
1737       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1738          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1739             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1740          features->extendedDynamicState = true;
1741          break;
1742       }
1743 
1744       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1745          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1746             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1747          features->extendedDynamicState2 = true;
1748          features->extendedDynamicState2LogicOp = true;
1749          features->extendedDynamicState2PatchControlPoints = false;
1750          break;
1751       }
1752 
1753       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
1754          VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
1755             (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
1756          features->shaderZeroInitializeWorkgroupMemory = true;
1757          break;
1758       }
1759 
1760       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1761          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1762          features->multiDraw = true;
1763          break;
1764       }
1765 
1766       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1767          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1768             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1769          features->primitiveTopologyListRestart = true;
1770          features->primitiveTopologyPatchListRestart = true;
1771          break;
1772       }
1773 
1774       default:
1775          anv_debug_ignored_stype(ext->sType);
1776          break;
1777       }
1778    }
1779 
1780 }
1781 
1782 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
1783 
1784 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1785 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
1786 
1787 #define MAX_CUSTOM_BORDER_COLORS                   4096
1788 
anv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1789 void anv_GetPhysicalDeviceProperties(
1790     VkPhysicalDevice                            physicalDevice,
1791     VkPhysicalDeviceProperties*                 pProperties)
1792 {
1793    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1794    const struct intel_device_info *devinfo = &pdevice->info;
1795 
1796    const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
1797    const uint32_t max_textures =
1798       pdevice->has_bindless_images ? UINT16_MAX : 128;
1799    const uint32_t max_samplers =
1800       pdevice->has_bindless_samplers ? UINT16_MAX :
1801       (devinfo->verx10 >= 75) ? 128 : 16;
1802    const uint32_t max_images =
1803       pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
1804 
1805    /* If we can use bindless for everything, claim a high per-stage limit,
1806     * otherwise use the binding table size, minus the slots reserved for
1807     * render targets and one slot for the descriptor buffer. */
1808    const uint32_t max_per_stage =
1809       pdevice->has_bindless_images && pdevice->has_a64_buffer_access
1810       ? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
1811 
1812    const uint32_t max_workgroup_size = 32 * devinfo->max_cs_workgroup_threads;
1813 
1814    VkSampleCountFlags sample_counts =
1815       isl_device_get_sample_counts(&pdevice->isl_dev);
1816 
1817 
1818    VkPhysicalDeviceLimits limits = {
1819       .maxImageDimension1D                      = (1 << 14),
1820       .maxImageDimension2D                      = (1 << 14),
1821       .maxImageDimension3D                      = (1 << 11),
1822       .maxImageDimensionCube                    = (1 << 14),
1823       .maxImageArrayLayers                      = (1 << 11),
1824       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1825       .maxUniformBufferRange                    = (1ul << 27),
1826       .maxStorageBufferRange                    = pdevice->isl_dev.max_buffer_size,
1827       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1828       .maxMemoryAllocationCount                 = UINT32_MAX,
1829       .maxSamplerAllocationCount                = 64 * 1024,
1830       .bufferImageGranularity                   = 64, /* A cache line */
1831       .sparseAddressSpaceSize                   = 0,
1832       .maxBoundDescriptorSets                   = MAX_SETS,
1833       .maxPerStageDescriptorSamplers            = max_samplers,
1834       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1835       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1836       .maxPerStageDescriptorSampledImages       = max_textures,
1837       .maxPerStageDescriptorStorageImages       = max_images,
1838       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1839       .maxPerStageResources                     = max_per_stage,
1840       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1841       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1842       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1843       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1844       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1845       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1846       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1847       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1848       .maxVertexInputAttributes                 = MAX_VBS,
1849       .maxVertexInputBindings                   = MAX_VBS,
1850       .maxVertexInputAttributeOffset            = 2047,
1851       .maxVertexInputBindingStride              = 2048,
1852       .maxVertexOutputComponents                = 128,
1853       .maxTessellationGenerationLevel           = 64,
1854       .maxTessellationPatchSize                 = 32,
1855       .maxTessellationControlPerVertexInputComponents = 128,
1856       .maxTessellationControlPerVertexOutputComponents = 128,
1857       .maxTessellationControlPerPatchOutputComponents = 128,
1858       .maxTessellationControlTotalOutputComponents = 2048,
1859       .maxTessellationEvaluationInputComponents = 128,
1860       .maxTessellationEvaluationOutputComponents = 128,
1861       .maxGeometryShaderInvocations             = 32,
1862       .maxGeometryInputComponents               = devinfo->ver >= 8 ? 128 : 64,
1863       .maxGeometryOutputComponents              = 128,
1864       .maxGeometryOutputVertices                = 256,
1865       .maxGeometryTotalOutputComponents         = 1024,
1866       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1867       .maxFragmentOutputAttachments             = 8,
1868       .maxFragmentDualSrcAttachments            = 1,
1869       .maxFragmentCombinedOutputResources       = 8,
1870       .maxComputeSharedMemorySize               = 64 * 1024,
1871       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1872       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1873       .maxComputeWorkGroupSize = {
1874          max_workgroup_size,
1875          max_workgroup_size,
1876          max_workgroup_size,
1877       },
1878       .subPixelPrecisionBits                    = 8,
1879       .subTexelPrecisionBits                    = 8,
1880       .mipmapPrecisionBits                      = 8,
1881       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1882       .maxDrawIndirectCount                     = UINT32_MAX,
1883       .maxSamplerLodBias                        = 16,
1884       .maxSamplerAnisotropy                     = 16,
1885       .maxViewports                             = MAX_VIEWPORTS,
1886       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1887       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1888       .viewportSubPixelBits                     = 13, /* We take a float? */
1889       .minMemoryMapAlignment                    = 4096, /* A page */
1890       /* The dataport requires texel alignment so we need to assume a worst
1891        * case of R32G32B32A32 which is 16 bytes.
1892        */
1893       .minTexelBufferOffsetAlignment            = 16,
1894       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1895       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1896       .minTexelOffset                           = -8,
1897       .maxTexelOffset                           = 7,
1898       .minTexelGatherOffset                     = -32,
1899       .maxTexelGatherOffset                     = 31,
1900       .minInterpolationOffset                   = -0.5,
1901       .maxInterpolationOffset                   = 0.4375,
1902       .subPixelInterpolationOffsetBits          = 4,
1903       .maxFramebufferWidth                      = (1 << 14),
1904       .maxFramebufferHeight                     = (1 << 14),
1905       .maxFramebufferLayers                     = (1 << 11),
1906       .framebufferColorSampleCounts             = sample_counts,
1907       .framebufferDepthSampleCounts             = sample_counts,
1908       .framebufferStencilSampleCounts           = sample_counts,
1909       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1910       .maxColorAttachments                      = MAX_RTS,
1911       .sampledImageColorSampleCounts            = sample_counts,
1912       .sampledImageIntegerSampleCounts          = sample_counts,
1913       .sampledImageDepthSampleCounts            = sample_counts,
1914       .sampledImageStencilSampleCounts          = sample_counts,
1915       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1916       .maxSampleMaskWords                       = 1,
1917       .timestampComputeAndGraphics              = true,
1918       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1919       .maxClipDistances                         = 8,
1920       .maxCullDistances                         = 8,
1921       .maxCombinedClipAndCullDistances          = 8,
1922       .discreteQueuePriorities                  = 2,
1923       .pointSizeRange                           = { 0.125, 255.875 },
1924       /* While SKL and up support much wider lines than we are setting here,
1925        * in practice we run into conformance issues if we go past this limit.
1926        * Since the Windows driver does the same, it's probably fair to assume
1927        * that no one needs more than this.
1928        */
1929       .lineWidthRange                           = { 0.0, 7.9921875 },
1930       .pointSizeGranularity                     = (1.0 / 8.0),
1931       .lineWidthGranularity                     = (1.0 / 128.0),
1932       .strictLines                              = false,
1933       .standardSampleLocations                  = true,
1934       .optimalBufferCopyOffsetAlignment         = 128,
1935       .optimalBufferCopyRowPitchAlignment       = 128,
1936       .nonCoherentAtomSize                      = 64,
1937    };
1938 
1939    *pProperties = (VkPhysicalDeviceProperties) {
1940       .apiVersion = ANV_API_VERSION,
1941       .driverVersion = vk_get_driver_version(),
1942       .vendorID = 0x8086,
1943       .deviceID = pdevice->info.chipset_id,
1944       .deviceType = pdevice->info.has_local_mem ?
1945                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1946                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1947       .limits = limits,
1948       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
1949    };
1950 
1951    snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
1952             "%s", pdevice->info.name);
1953    memcpy(pProperties->pipelineCacheUUID,
1954           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1955 }
1956 
1957 static void
anv_get_physical_device_properties_1_1(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1958 anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
1959                                        VkPhysicalDeviceVulkan11Properties *p)
1960 {
1961    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1962 
1963    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1964    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1965    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1966    p->deviceNodeMask = 0;
1967    p->deviceLUIDValid = false;
1968 
1969    p->subgroupSize = BRW_SUBGROUP_SIZE;
1970    VkShaderStageFlags scalar_stages = 0;
1971    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
1972       if (pdevice->compiler->scalar_stage[stage])
1973          scalar_stages |= mesa_to_vk_shader_stage(stage);
1974    }
1975    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
1976       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
1977                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
1978                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
1979                        VK_SHADER_STAGE_MISS_BIT_KHR |
1980                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
1981                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
1982    }
1983    p->subgroupSupportedStages = scalar_stages;
1984    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1985                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
1986                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
1987                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1988                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1989                                     VK_SUBGROUP_FEATURE_QUAD_BIT;
1990    if (pdevice->info.ver >= 8) {
1991       /* TODO: There's no technical reason why these can't be made to
1992        * work on gfx7 but they don't at the moment so it's best to leave
1993        * the feature disabled than enabled and broken.
1994        */
1995       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1996                                         VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
1997    }
1998    p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
1999 
2000    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
2001    p->maxMultiviewViewCount      = 16;
2002    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
2003    p->protectedNoFault           = false;
2004    /* This value doesn't matter for us today as our per-stage descriptors are
2005     * the real limit.
2006     */
2007    p->maxPerSetDescriptors       = 1024;
2008    p->maxMemoryAllocationSize    = MAX_MEMORY_ALLOCATION_SIZE;
2009 }
2010 
2011 static void
anv_get_physical_device_properties_1_2(struct anv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)2012 anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
2013                                        VkPhysicalDeviceVulkan12Properties *p)
2014 {
2015    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2016 
2017    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR;
2018    memset(p->driverName, 0, sizeof(p->driverName));
2019    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
2020             "Intel open-source Mesa driver");
2021    memset(p->driverInfo, 0, sizeof(p->driverInfo));
2022    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
2023             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
2024 
2025    /* Don't advertise conformance with a particular version if the hardware's
2026     * support is incomplete/alpha.
2027     */
2028    if (pdevice->is_alpha) {
2029       p->conformanceVersion = (VkConformanceVersionKHR) {
2030          .major = 0,
2031          .minor = 0,
2032          .subminor = 0,
2033          .patch = 0,
2034       };
2035    }
2036    else {
2037       p->conformanceVersion = (VkConformanceVersionKHR) {
2038          .major = 1,
2039          .minor = 2,
2040          .subminor = 0,
2041          .patch = 0,
2042       };
2043    }
2044 
2045    p->denormBehaviorIndependence =
2046       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
2047    p->roundingModeIndependence =
2048       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
2049 
2050    /* Broadwell does not support HF denorms and there are restrictions
2051     * other gens. According to Kabylake's PRM:
2052     *
2053     * "math - Extended Math Function
2054     * [...]
2055     * Restriction : Half-float denorms are always retained."
2056     */
2057    p->shaderDenormFlushToZeroFloat16         = false;
2058    p->shaderDenormPreserveFloat16            = pdevice->info.ver > 8;
2059    p->shaderRoundingModeRTEFloat16           = true;
2060    p->shaderRoundingModeRTZFloat16           = true;
2061    p->shaderSignedZeroInfNanPreserveFloat16  = true;
2062 
2063    p->shaderDenormFlushToZeroFloat32         = true;
2064    p->shaderDenormPreserveFloat32            = true;
2065    p->shaderRoundingModeRTEFloat32           = true;
2066    p->shaderRoundingModeRTZFloat32           = true;
2067    p->shaderSignedZeroInfNanPreserveFloat32  = true;
2068 
2069    p->shaderDenormFlushToZeroFloat64         = true;
2070    p->shaderDenormPreserveFloat64            = true;
2071    p->shaderRoundingModeRTEFloat64           = true;
2072    p->shaderRoundingModeRTZFloat64           = true;
2073    p->shaderSignedZeroInfNanPreserveFloat64  = true;
2074 
2075    /* It's a bit hard to exactly map our implementation to the limits
2076     * described by Vulkan.  The bindless surface handle in the extended
2077     * message descriptors is 20 bits and it's an index into the table of
2078     * RENDER_SURFACE_STATE structs that starts at bindless surface base
2079     * address.  This means that we can have at must 1M surface states
2080     * allocated at any given time.  Since most image views take two
2081     * descriptors, this means we have a limit of about 500K image views.
2082     *
2083     * However, since we allocate surface states at vkCreateImageView time,
2084     * this means our limit is actually something on the order of 500K image
2085     * views allocated at any time.  The actual limit describe by Vulkan, on
2086     * the other hand, is a limit of how many you can have in a descriptor set.
2087     * Assuming anyone using 1M descriptors will be using the same image view
2088     * twice a bunch of times (or a bunch of null descriptors), we can safely
2089     * advertise a larger limit here.
2090     */
2091    const unsigned max_bindless_views = 1 << 20;
2092    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
2093    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
2094    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
2095    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
2096    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
2097    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2098    p->robustBufferAccessUpdateAfterBind                  = true;
2099    p->quadDivergentImplicitLod                           = false;
2100    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
2101    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2102    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
2103    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
2104    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
2105    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
2106    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
2107    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
2108    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2109    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2110    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
2111    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2112    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
2113    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
2114    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
2115 
2116    /* We support all of the depth resolve modes */
2117    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
2118                                       VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
2119                                       VK_RESOLVE_MODE_MIN_BIT_KHR |
2120                                       VK_RESOLVE_MODE_MAX_BIT_KHR;
2121    /* Average doesn't make sense for stencil so we don't support that */
2122    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
2123    if (pdevice->info.ver >= 8) {
2124       /* The advanced stencil resolve modes currently require stencil
2125        * sampling be supported by the hardware.
2126        */
2127       p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT_KHR |
2128                                          VK_RESOLVE_MODE_MAX_BIT_KHR;
2129    }
2130    p->independentResolveNone  = true;
2131    p->independentResolve      = true;
2132 
2133    p->filterMinmaxSingleComponentFormats  = pdevice->info.ver >= 9;
2134    p->filterMinmaxImageComponentMapping   = pdevice->info.ver >= 9;
2135 
2136    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2137 
2138    p->framebufferIntegerColorSampleCounts =
2139       isl_device_get_sample_counts(&pdevice->isl_dev);
2140 }
2141 
anv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)2142 void anv_GetPhysicalDeviceProperties2(
2143     VkPhysicalDevice                            physicalDevice,
2144     VkPhysicalDeviceProperties2*                pProperties)
2145 {
2146    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2147 
2148    anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2149 
2150    VkPhysicalDeviceVulkan11Properties core_1_1 = {
2151       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2152    };
2153    anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2154 
2155    VkPhysicalDeviceVulkan12Properties core_1_2 = {
2156       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2157    };
2158    anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2159 
2160    vk_foreach_struct(ext, pProperties->pNext) {
2161       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2162          continue;
2163       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2164          continue;
2165 
2166       switch (ext->sType) {
2167       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2168          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = (void *)ext;
2169          props->maxGeometryCount = (1u << 24) - 1;
2170          props->maxInstanceCount = (1u << 24) - 1;
2171          props->maxPrimitiveCount = (1u << 29) - 1;
2172          props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
2173          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
2174          props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
2175          props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
2176          props->minAccelerationStructureScratchOffsetAlignment = 64;
2177          break;
2178       }
2179 
2180       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2181          /* TODO: Real limits */
2182          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2183             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2184          /* There's nothing in the public docs about this value as far as I
2185           * can tell.  However, this is the value the Windows driver reports
2186           * and there's a comment on a rejected HW feature in the internal
2187           * docs that says:
2188           *
2189           *    "This is similar to conservative rasterization, except the
2190           *    primitive area is not extended by 1/512 and..."
2191           *
2192           * That's a bit of an obtuse reference but it's the best we've got
2193           * for now.
2194           */
2195          properties->primitiveOverestimationSize = 1.0f / 512.0f;
2196          properties->maxExtraPrimitiveOverestimationSize = 0.0f;
2197          properties->extraPrimitiveOverestimationSizeGranularity = 0.0f;
2198          properties->primitiveUnderestimation = false;
2199          properties->conservativePointAndLineRasterization = false;
2200          properties->degenerateTrianglesRasterized = true;
2201          properties->degenerateLinesRasterized = false;
2202          properties->fullyCoveredFragmentShaderInputVariable = false;
2203          properties->conservativeRasterizationPostDepthCoverage = true;
2204          break;
2205       }
2206 
2207       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2208          VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
2209             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2210          properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
2211          break;
2212       }
2213 
2214 
2215       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2216          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2217             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2218          /* Those must be 0 if attachmentFragmentShadingRate is not
2219           * supported.
2220           */
2221          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2222          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2223          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
2224 
2225          props->primitiveFragmentShadingRateWithMultipleViewports = false;
2226          props->layeredShadingRateAttachments = false;
2227          props->fragmentShadingRateNonTrivialCombinerOps = false;
2228          props->maxFragmentSize = (VkExtent2D) { 4, 4 };
2229          props->maxFragmentSizeAspectRatio = 4;
2230          props->maxFragmentShadingRateCoverageSamples = 4 * 4 * 16;
2231          props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT;
2232          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2233          props->fragmentShadingRateWithSampleMask = true;
2234          props->fragmentShadingRateWithShaderSampleMask = false;
2235          props->fragmentShadingRateWithConservativeRasterization = true;
2236          props->fragmentShadingRateWithFragmentShaderInterlock = true;
2237          props->fragmentShadingRateWithCustomSampleLocations = true;
2238          props->fragmentShadingRateStrictMultiplyCombiner = false;
2239          break;
2240       }
2241 
2242       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2243          VkPhysicalDeviceDrmPropertiesEXT *props =
2244             (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2245 
2246          props->hasPrimary = pdevice->has_master;
2247          props->primaryMajor = pdevice->master_major;
2248          props->primaryMinor = pdevice->master_minor;
2249 
2250          props->hasRender = pdevice->has_local;
2251          props->renderMajor = pdevice->local_major;
2252          props->renderMinor = pdevice->local_minor;
2253 
2254          break;
2255       }
2256 
2257       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2258          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
2259             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
2260          /* Userptr needs page aligned memory. */
2261          props->minImportedHostPointerAlignment = 4096;
2262          break;
2263       }
2264 
2265       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2266          VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2267             (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2268          props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2269          props->maxPerStageDescriptorInlineUniformBlocks =
2270             MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2271          props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2272             MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2273          props->maxDescriptorSetInlineUniformBlocks =
2274             MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2275          props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
2276             MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2277          break;
2278       }
2279 
2280       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2281          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2282             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2283          /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
2284           * Sampling Rules - Legacy Mode", it says the following:
2285           *
2286           *    "Note that the device divides a pixel into a 16x16 array of
2287           *    subpixels, referenced by their upper left corners."
2288           *
2289           * This is the only known reference in the PRMs to the subpixel
2290           * precision of line rasterization and a "16x16 array of subpixels"
2291           * implies 4 subpixel precision bits.  Empirical testing has shown
2292           * that 4 subpixel precision bits applies to all line rasterization
2293           * types.
2294           */
2295          props->lineSubPixelPrecisionBits = 4;
2296          break;
2297       }
2298 
2299       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: {
2300          VkPhysicalDeviceMaintenance4PropertiesKHR *properties =
2301             (VkPhysicalDeviceMaintenance4PropertiesKHR *)ext;
2302          properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2303          break;
2304       }
2305 
2306       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2307          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2308             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2309          properties->pciDomain = pdevice->pci_info.domain;
2310          properties->pciBus = pdevice->pci_info.bus;
2311          properties->pciDevice = pdevice->pci_info.device;
2312          properties->pciFunction = pdevice->pci_info.function;
2313          break;
2314       }
2315 
2316       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
2317          VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
2318             (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
2319          /* We could support this by spawning a shader to do the equation
2320           * normalization.
2321           */
2322          properties->allowCommandBufferQueryCopies = false;
2323          break;
2324       }
2325 
2326 #pragma GCC diagnostic push
2327 #pragma GCC diagnostic ignored "-Wswitch"
2328       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2329          VkPhysicalDevicePresentationPropertiesANDROID *props =
2330             (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2331          props->sharedImage = VK_FALSE;
2332          break;
2333       }
2334 #pragma GCC diagnostic pop
2335 
2336       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2337          VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
2338             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2339          properties->provokingVertexModePerPipeline = true;
2340          properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
2341          break;
2342       }
2343 
2344       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2345          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2346             (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
2347          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2348          break;
2349       }
2350 
2351       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2352          VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
2353          properties->robustStorageBufferAccessSizeAlignment =
2354             ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
2355          properties->robustUniformBufferAccessSizeAlignment =
2356             ANV_UBO_ALIGNMENT;
2357          break;
2358       }
2359 
2360       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR: {
2361          VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *props =
2362             (VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *)ext;
2363 
2364          props->integerDotProduct8BitUnsignedAccelerated = false;
2365          props->integerDotProduct8BitSignedAccelerated = false;
2366          props->integerDotProduct8BitMixedSignednessAccelerated = false;
2367          props->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2368          props->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2369          props->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2370          props->integerDotProduct16BitUnsignedAccelerated = false;
2371          props->integerDotProduct16BitSignedAccelerated = false;
2372          props->integerDotProduct16BitMixedSignednessAccelerated = false;
2373          props->integerDotProduct32BitUnsignedAccelerated = false;
2374          props->integerDotProduct32BitSignedAccelerated = false;
2375          props->integerDotProduct32BitMixedSignednessAccelerated = false;
2376          props->integerDotProduct64BitUnsignedAccelerated = false;
2377          props->integerDotProduct64BitSignedAccelerated = false;
2378          props->integerDotProduct64BitMixedSignednessAccelerated = false;
2379          props->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
2380          props->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
2381          props->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2382          props->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2383          props->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2384          props->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2385          props->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
2386          props->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
2387          props->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2388          props->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2389          props->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2390          props->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2391          props->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2392          props->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2393          props->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2394 
2395          break;
2396       }
2397 
2398       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2399          VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2400             (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2401          STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
2402          props->minSubgroupSize = 8;
2403          props->maxSubgroupSize = 32;
2404          props->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
2405          props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2406          break;
2407       }
2408 
2409       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2410          VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
2411             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2412 
2413          props->sampleLocationSampleCounts =
2414             isl_device_get_sample_counts(&pdevice->isl_dev);
2415 
2416          /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2417          props->maxSampleLocationGridSize.width = 1;
2418          props->maxSampleLocationGridSize.height = 1;
2419 
2420          props->sampleLocationCoordinateRange[0] = 0;
2421          props->sampleLocationCoordinateRange[1] = 0.9375;
2422          props->sampleLocationSubPixelBits = 4;
2423 
2424          props->variableSampleLocations = true;
2425          break;
2426       }
2427 
2428       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2429          VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *props =
2430             (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2431 
2432          /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
2433           * Base Address:
2434           *
2435           *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
2436           *    specifies the base address of the first element of the surface,
2437           *    computed in software by adding the surface base address to the
2438           *    byte offset of the element in the buffer. The base address must
2439           *    be aligned to element size."
2440           *
2441           * The typed dataport messages require that things be texel aligned.
2442           * Otherwise, we may just load/store the wrong data or, in the worst
2443           * case, there may be hangs.
2444           */
2445          props->storageTexelBufferOffsetAlignmentBytes = 16;
2446          props->storageTexelBufferOffsetSingleTexelAlignment = true;
2447 
2448          /* The sampler, however, is much more forgiving and it can handle
2449           * arbitrary byte alignment for linear and buffer surfaces.  It's
2450           * hard to find a good PRM citation for this but years of empirical
2451           * experience demonstrate that this is true.
2452           */
2453          props->uniformTexelBufferOffsetAlignmentBytes = 1;
2454          props->uniformTexelBufferOffsetSingleTexelAlignment = false;
2455          break;
2456       }
2457 
2458       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2459          VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2460             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2461 
2462          props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2463          props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2464          props->maxTransformFeedbackBufferSize = (1ull << 32);
2465          props->maxTransformFeedbackStreamDataSize = 128 * 4;
2466          props->maxTransformFeedbackBufferDataSize = 128 * 4;
2467          props->maxTransformFeedbackBufferDataStride = 2048;
2468          props->transformFeedbackQueries = true;
2469          props->transformFeedbackStreamsLinesTriangles = false;
2470          props->transformFeedbackRasterizationStreamSelect = false;
2471          /* This requires MI_MATH */
2472          props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
2473          break;
2474       }
2475 
2476       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2477          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2478             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2479          /* We have to restrict this a bit for multiview */
2480          props->maxVertexAttribDivisor = UINT32_MAX / 16;
2481          break;
2482       }
2483 
2484       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2485          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2486          props->maxMultiDrawCount = 2048;
2487          break;
2488       }
2489 
2490       default:
2491          anv_debug_ignored_stype(ext->sType);
2492          break;
2493       }
2494    }
2495 }
2496 
2497 static const VkQueueFamilyProperties
2498 anv_queue_family_properties_template = {
2499    .timestampValidBits = 36, /* XXX: Real value here */
2500    .minImageTransferGranularity = { 1, 1, 1 },
2501 };
2502 
anv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)2503 void anv_GetPhysicalDeviceQueueFamilyProperties(
2504     VkPhysicalDevice                            physicalDevice,
2505     uint32_t*                                   pCount,
2506     VkQueueFamilyProperties*                    pQueueFamilyProperties)
2507 {
2508    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2509    VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
2510 
2511    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2512       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2513       vk_outarray_append(&out, p) {
2514          *p = anv_queue_family_properties_template;
2515          p->queueFlags = queue_family->queueFlags;
2516          p->queueCount = queue_family->queueCount;
2517       }
2518    }
2519 }
2520 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2521 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2522     VkPhysicalDevice                            physicalDevice,
2523     uint32_t*                                   pQueueFamilyPropertyCount,
2524     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2525 {
2526    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2527    VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
2528 
2529    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2530       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2531       vk_outarray_append(&out, p) {
2532          p->queueFamilyProperties = anv_queue_family_properties_template;
2533          p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2534          p->queueFamilyProperties.queueCount = queue_family->queueCount;
2535 
2536          vk_foreach_struct(s, p->pNext) {
2537             anv_debug_ignored_stype(s->sType);
2538          }
2539       }
2540    }
2541 }
2542 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2543 void anv_GetPhysicalDeviceMemoryProperties(
2544     VkPhysicalDevice                            physicalDevice,
2545     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2546 {
2547    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2548 
2549    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2550    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2551       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2552          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2553          .heapIndex     = physical_device->memory.types[i].heapIndex,
2554       };
2555    }
2556 
2557    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2558    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2559       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2560          .size    = physical_device->memory.heaps[i].size,
2561          .flags   = physical_device->memory.heaps[i].flags,
2562       };
2563    }
2564 }
2565 
2566 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2567 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2568                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2569 {
2570    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2571 
2572    anv_update_meminfo(device, device->local_fd);
2573 
2574    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2575    for (size_t i = 0; i < device->memory.heap_count; i++) {
2576       if (device->memory.heaps[i].is_local_mem) {
2577          total_vram_heaps_size += device->memory.heaps[i].size;
2578       } else {
2579          total_sys_heaps_size += device->memory.heaps[i].size;
2580       }
2581    }
2582 
2583    for (size_t i = 0; i < device->memory.heap_count; i++) {
2584       VkDeviceSize heap_size = device->memory.heaps[i].size;
2585       VkDeviceSize heap_used = device->memory.heaps[i].used;
2586       VkDeviceSize heap_budget, total_heaps_size;
2587       uint64_t mem_available = 0;
2588 
2589       if (device->memory.heaps[i].is_local_mem) {
2590          total_heaps_size = total_vram_heaps_size;
2591          mem_available = device->vram.available;
2592       } else {
2593          total_heaps_size = total_sys_heaps_size;
2594          mem_available = device->sys.available;
2595       }
2596 
2597       double heap_proportion = (double) heap_size / total_heaps_size;
2598       VkDeviceSize available_prop = mem_available * heap_proportion;
2599 
2600       /*
2601        * Let's not incite the app to starve the system: report at most 90% of
2602        * the available heap memory.
2603        */
2604       uint64_t heap_available = available_prop * 9 / 10;
2605       heap_budget = MIN2(heap_size, heap_used + heap_available);
2606 
2607       /*
2608        * Round down to the nearest MB
2609        */
2610       heap_budget &= ~((1ull << 20) - 1);
2611 
2612       /*
2613        * The heapBudget value must be non-zero for array elements less than
2614        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2615        * value must be less than or equal to VkMemoryHeap::size for each heap.
2616        */
2617       assert(0 < heap_budget && heap_budget <= heap_size);
2618 
2619       memoryBudget->heapUsage[i] = heap_used;
2620       memoryBudget->heapBudget[i] = heap_budget;
2621    }
2622 
2623    /* The heapBudget and heapUsage values must be zero for array elements
2624     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2625     */
2626    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2627       memoryBudget->heapBudget[i] = 0;
2628       memoryBudget->heapUsage[i] = 0;
2629    }
2630 }
2631 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2632 void anv_GetPhysicalDeviceMemoryProperties2(
2633     VkPhysicalDevice                            physicalDevice,
2634     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2635 {
2636    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2637                                          &pMemoryProperties->memoryProperties);
2638 
2639    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2640       switch (ext->sType) {
2641       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2642          anv_get_memory_budget(physicalDevice, (void*)ext);
2643          break;
2644       default:
2645          anv_debug_ignored_stype(ext->sType);
2646          break;
2647       }
2648    }
2649 }
2650 
2651 void
anv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)2652 anv_GetDeviceGroupPeerMemoryFeatures(
2653     VkDevice                                    device,
2654     uint32_t                                    heapIndex,
2655     uint32_t                                    localDeviceIndex,
2656     uint32_t                                    remoteDeviceIndex,
2657     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
2658 {
2659    assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
2660    *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2661                           VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2662                           VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2663                           VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2664 }
2665 
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2666 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2667     VkInstance                                  _instance,
2668     const char*                                 pName)
2669 {
2670    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2671    return vk_instance_get_proc_addr(&instance->vk,
2672                                     &anv_instance_entrypoints,
2673                                     pName);
2674 }
2675 
2676 /* With version 1+ of the loader interface the ICD should expose
2677  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2678  */
2679 PUBLIC
2680 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2681     VkInstance                                  instance,
2682     const char*                                 pName);
2683 
2684 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2685 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2686     VkInstance                                  instance,
2687     const char*                                 pName)
2688 {
2689    return anv_GetInstanceProcAddr(instance, pName);
2690 }
2691 
2692 /* With version 4+ of the loader interface the ICD should expose
2693  * vk_icdGetPhysicalDeviceProcAddr()
2694  */
2695 PUBLIC
2696 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
2697     VkInstance  _instance,
2698     const char* pName);
2699 
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)2700 PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(
2701     VkInstance  _instance,
2702     const char* pName)
2703 {
2704    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2705    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2706 }
2707 
2708 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2709 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2710 {
2711    struct anv_state state;
2712 
2713    state = anv_state_pool_alloc(pool, size, align);
2714    memcpy(state.map, p, size);
2715 
2716    return state;
2717 }
2718 
2719 static void
anv_device_init_border_colors(struct anv_device * device)2720 anv_device_init_border_colors(struct anv_device *device)
2721 {
2722    if (device->info.is_haswell) {
2723       static const struct hsw_border_color border_colors[] = {
2724          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2725          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2726          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2727          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2728          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2729          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2730       };
2731 
2732       device->border_colors =
2733          anv_state_pool_emit_data(&device->dynamic_state_pool,
2734                                   sizeof(border_colors), 512, border_colors);
2735    } else {
2736       static const struct gfx8_border_color border_colors[] = {
2737          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2738          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2739          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2740          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2741          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2742          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2743       };
2744 
2745       device->border_colors =
2746          anv_state_pool_emit_data(&device->dynamic_state_pool,
2747                                   sizeof(border_colors), 64, border_colors);
2748    }
2749 }
2750 
2751 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2752 anv_device_init_trivial_batch(struct anv_device *device)
2753 {
2754    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2755                                          ANV_BO_ALLOC_MAPPED,
2756                                          0 /* explicit_address */,
2757                                          &device->trivial_batch_bo);
2758    if (result != VK_SUCCESS)
2759       return result;
2760 
2761    struct anv_batch batch = {
2762       .start = device->trivial_batch_bo->map,
2763       .next = device->trivial_batch_bo->map,
2764       .end = device->trivial_batch_bo->map + 4096,
2765    };
2766 
2767    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2768    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2769 
2770    if (!device->info.has_llc)
2771       intel_clflush_range(batch.start, batch.next - batch.start);
2772 
2773    return VK_SUCCESS;
2774 }
2775 
2776 static int
vk_priority_to_gen(int priority)2777 vk_priority_to_gen(int priority)
2778 {
2779    switch (priority) {
2780    case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2781       return INTEL_CONTEXT_LOW_PRIORITY;
2782    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2783       return INTEL_CONTEXT_MEDIUM_PRIORITY;
2784    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2785       return INTEL_CONTEXT_HIGH_PRIORITY;
2786    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2787       return INTEL_CONTEXT_REALTIME_PRIORITY;
2788    default:
2789       unreachable("Invalid priority");
2790    }
2791 }
2792 
2793 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2794 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2795                  struct anv_block_pool *pool,
2796                  uint64_t address)
2797 {
2798    anv_block_pool_foreach_bo(bo, pool) {
2799       uint64_t bo_address = intel_48b_address(bo->offset);
2800       if (address >= bo_address && address < (bo_address + bo->size)) {
2801          *ret = (struct intel_batch_decode_bo) {
2802             .addr = bo_address,
2803             .size = bo->size,
2804             .map = bo->map,
2805          };
2806          return true;
2807       }
2808    }
2809    return false;
2810 }
2811 
2812 /* Finding a buffer for batch decoding */
2813 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2814 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2815 {
2816    struct anv_device *device = v_batch;
2817    struct intel_batch_decode_bo ret_bo = {};
2818 
2819    assert(ppgtt);
2820 
2821    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2822       return ret_bo;
2823    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2824       return ret_bo;
2825    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2826       return ret_bo;
2827    if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2828       return ret_bo;
2829 
2830    if (!device->cmd_buffer_being_decoded)
2831       return (struct intel_batch_decode_bo) { };
2832 
2833    struct anv_batch_bo **bo;
2834 
2835    u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2836       /* The decoder zeroes out the top 16 bits, so we need to as well */
2837       uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2838 
2839       if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2840          return (struct intel_batch_decode_bo) {
2841             .addr = bo_address,
2842             .size = (*bo)->bo->size,
2843             .map = (*bo)->bo->map,
2844          };
2845       }
2846    }
2847 
2848    return (struct intel_batch_decode_bo) { };
2849 }
2850 
2851 struct intel_aux_map_buffer {
2852    struct intel_buffer base;
2853    struct anv_state state;
2854 };
2855 
2856 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2857 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2858 {
2859    struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
2860    if (!buf)
2861       return NULL;
2862 
2863    struct anv_device *device = (struct anv_device*)driver_ctx;
2864    assert(device->physical->supports_48bit_addresses &&
2865           device->physical->use_softpin);
2866 
2867    struct anv_state_pool *pool = &device->dynamic_state_pool;
2868    buf->state = anv_state_pool_alloc(pool, size, size);
2869 
2870    buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
2871    buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
2872    buf->base.map = buf->state.map;
2873    buf->base.driver_bo = &buf->state;
2874    return &buf->base;
2875 }
2876 
2877 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2878 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2879 {
2880    struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
2881    struct anv_device *device = (struct anv_device*)driver_ctx;
2882    struct anv_state_pool *pool = &device->dynamic_state_pool;
2883    anv_state_pool_free(pool, buf->state);
2884    free(buf);
2885 }
2886 
2887 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2888    .alloc = intel_aux_map_buffer_alloc,
2889    .free = intel_aux_map_buffer_free,
2890 };
2891 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2892 VkResult anv_CreateDevice(
2893     VkPhysicalDevice                            physicalDevice,
2894     const VkDeviceCreateInfo*                   pCreateInfo,
2895     const VkAllocationCallbacks*                pAllocator,
2896     VkDevice*                                   pDevice)
2897 {
2898    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2899    VkResult result;
2900    struct anv_device *device;
2901 
2902    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2903 
2904    /* Check enabled features */
2905    bool robust_buffer_access = false;
2906    if (pCreateInfo->pEnabledFeatures) {
2907       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2908          robust_buffer_access = true;
2909    }
2910 
2911    vk_foreach_struct_const(ext, pCreateInfo->pNext) {
2912       switch (ext->sType) {
2913       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2914          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2915          if (features->features.robustBufferAccess)
2916             robust_buffer_access = true;
2917          break;
2918       }
2919 
2920       default:
2921          /* Don't warn */
2922          break;
2923       }
2924    }
2925 
2926    /* Check requested queues and fail if we are requested to create any
2927     * queues with flags we don't support.
2928     */
2929    assert(pCreateInfo->queueCreateInfoCount > 0);
2930    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2931       if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2932          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2933    }
2934 
2935    /* Check if client specified queue priority. */
2936    const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority =
2937       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2938                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
2939 
2940    VkQueueGlobalPriorityEXT priority =
2941       queue_priority ? queue_priority->globalPriority :
2942          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
2943 
2944    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2945                        sizeof(*device), 8,
2946                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2947    if (!device)
2948       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2949 
2950    struct vk_device_dispatch_table dispatch_table;
2951    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2952       anv_genX(&physical_device->info, device_entrypoints), true);
2953    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2954       &anv_device_entrypoints, false);
2955    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2956       &wsi_device_entrypoints, false);
2957 
2958    result = vk_device_init(&device->vk, &physical_device->vk,
2959                            &dispatch_table, pCreateInfo, pAllocator);
2960    if (result != VK_SUCCESS)
2961       goto fail_alloc;
2962 
2963    if (INTEL_DEBUG(DEBUG_BATCH)) {
2964       const unsigned decode_flags =
2965          INTEL_BATCH_DECODE_FULL |
2966          (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
2967          INTEL_BATCH_DECODE_OFFSETS |
2968          INTEL_BATCH_DECODE_FLOATS;
2969 
2970       intel_batch_decode_ctx_init(&device->decoder_ctx,
2971                                   &physical_device->info,
2972                                   stderr, decode_flags, NULL,
2973                                   decode_get_bo, NULL, device);
2974    }
2975 
2976    device->physical = physical_device;
2977    device->_lost = false;
2978 
2979    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2980    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2981    if (device->fd == -1) {
2982       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2983       goto fail_device;
2984    }
2985 
2986    uint32_t num_queues = 0;
2987    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2988       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2989 
2990    if (device->physical->engine_info) {
2991       /* The kernel API supports at most 64 engines */
2992       assert(num_queues <= 64);
2993       uint16_t engine_classes[64];
2994       int engine_count = 0;
2995       for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2996          const VkDeviceQueueCreateInfo *queueCreateInfo =
2997             &pCreateInfo->pQueueCreateInfos[i];
2998 
2999          assert(queueCreateInfo->queueFamilyIndex <
3000                 physical_device->queue.family_count);
3001          struct anv_queue_family *queue_family =
3002             &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
3003 
3004          for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
3005             engine_classes[engine_count++] = queue_family->engine_class;
3006       }
3007       device->context_id =
3008          anv_gem_create_context_engines(device,
3009                                         physical_device->engine_info,
3010                                         engine_count, engine_classes);
3011    } else {
3012       assert(num_queues == 1);
3013       device->context_id = anv_gem_create_context(device);
3014    }
3015    if (device->context_id == -1) {
3016       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3017       goto fail_fd;
3018    }
3019 
3020    /* Here we tell the kernel not to attempt to recover our context but
3021     * immediately (on the next batchbuffer submission) report that the
3022     * context is lost, and we will do the recovery ourselves.  In the case
3023     * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
3024     * the client clean up the pieces.
3025     */
3026    anv_gem_set_context_param(device->fd, device->context_id,
3027                              I915_CONTEXT_PARAM_RECOVERABLE, false);
3028 
3029    device->has_thread_submit = physical_device->has_thread_submit;
3030 
3031    device->queues =
3032       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3033                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3034    if (device->queues == NULL) {
3035       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3036       goto fail_context_id;
3037    }
3038 
3039    device->queue_count = 0;
3040    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3041       const VkDeviceQueueCreateInfo *queueCreateInfo =
3042          &pCreateInfo->pQueueCreateInfos[i];
3043 
3044       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3045          /* When using legacy contexts, we use I915_EXEC_RENDER but, with
3046           * engine-based contexts, the bottom 6 bits of exec_flags are used
3047           * for the engine ID.
3048           */
3049          uint32_t exec_flags = device->physical->engine_info ?
3050                                device->queue_count : I915_EXEC_RENDER;
3051 
3052          result = anv_queue_init(device, &device->queues[device->queue_count],
3053                                  exec_flags, queueCreateInfo, j);
3054          if (result != VK_SUCCESS)
3055             goto fail_queues;
3056 
3057          device->queue_count++;
3058       }
3059    }
3060 
3061    if (physical_device->use_softpin) {
3062       if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3063          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3064          goto fail_queues;
3065       }
3066 
3067       /* keep the page with address zero out of the allocator */
3068       util_vma_heap_init(&device->vma_lo,
3069                          LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3070 
3071       util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3072                          CLIENT_VISIBLE_HEAP_SIZE);
3073 
3074       /* Leave the last 4GiB out of the high vma range, so that no state
3075        * base address + size can overflow 48 bits. For more information see
3076        * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3077        */
3078       util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3079                          physical_device->gtt_size - (1ull << 32) -
3080                          HIGH_HEAP_MIN_ADDRESS);
3081    }
3082 
3083    list_inithead(&device->memory_objects);
3084 
3085    /* As per spec, the driver implementation may deny requests to acquire
3086     * a priority above the default priority (MEDIUM) if the caller does not
3087     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
3088     * is returned.
3089     */
3090    if (physical_device->has_context_priority) {
3091       int err = anv_gem_set_context_param(device->fd, device->context_id,
3092                                           I915_CONTEXT_PARAM_PRIORITY,
3093                                           vk_priority_to_gen(priority));
3094       if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT) {
3095          result = vk_error(device, VK_ERROR_NOT_PERMITTED_EXT);
3096          goto fail_vmas;
3097       }
3098    }
3099 
3100    device->info = physical_device->info;
3101    device->isl_dev = physical_device->isl_dev;
3102 
3103    /* On Broadwell and later, we can use batch chaining to more efficiently
3104     * implement growing command buffers.  Prior to Haswell, the kernel
3105     * command parser gets in the way and we have to fall back to growing
3106     * the batch.
3107     */
3108    device->can_chain_batches = device->info.ver >= 8;
3109 
3110    device->robust_buffer_access = robust_buffer_access;
3111 
3112    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3113       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3114       goto fail_queues;
3115    }
3116 
3117    pthread_condattr_t condattr;
3118    if (pthread_condattr_init(&condattr) != 0) {
3119       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3120       goto fail_mutex;
3121    }
3122    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3123       pthread_condattr_destroy(&condattr);
3124       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3125       goto fail_mutex;
3126    }
3127    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3128       pthread_condattr_destroy(&condattr);
3129       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3130       goto fail_mutex;
3131    }
3132    pthread_condattr_destroy(&condattr);
3133 
3134    result = anv_bo_cache_init(&device->bo_cache, device);
3135    if (result != VK_SUCCESS)
3136       goto fail_queue_cond;
3137 
3138    anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
3139 
3140    /* Because scratch is also relative to General State Base Address, we leave
3141     * the base address 0 and start the pool memory at an offset.  This way we
3142     * get the correct offsets in the anv_states that get allocated from it.
3143     */
3144    result = anv_state_pool_init(&device->general_state_pool, device,
3145                                 "general pool",
3146                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
3147    if (result != VK_SUCCESS)
3148       goto fail_batch_bo_pool;
3149 
3150    result = anv_state_pool_init(&device->dynamic_state_pool, device,
3151                                 "dynamic pool",
3152                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
3153    if (result != VK_SUCCESS)
3154       goto fail_general_state_pool;
3155 
3156    if (device->info.ver >= 8) {
3157       /* The border color pointer is limited to 24 bits, so we need to make
3158        * sure that any such color used at any point in the program doesn't
3159        * exceed that limit.
3160        * We achieve that by reserving all the custom border colors we support
3161        * right off the bat, so they are close to the base address.
3162        */
3163       anv_state_reserved_pool_init(&device->custom_border_colors,
3164                                    &device->dynamic_state_pool,
3165                                    MAX_CUSTOM_BORDER_COLORS,
3166                                    sizeof(struct gfx8_border_color), 64);
3167    }
3168 
3169    result = anv_state_pool_init(&device->instruction_state_pool, device,
3170                                 "instruction pool",
3171                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
3172    if (result != VK_SUCCESS)
3173       goto fail_dynamic_state_pool;
3174 
3175    result = anv_state_pool_init(&device->surface_state_pool, device,
3176                                 "surface state pool",
3177                                 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3178    if (result != VK_SUCCESS)
3179       goto fail_instruction_state_pool;
3180 
3181    if (physical_device->use_softpin) {
3182       int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3183                                (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
3184       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3185       result = anv_state_pool_init(&device->binding_table_pool, device,
3186                                    "binding table pool",
3187                                    SURFACE_STATE_POOL_MIN_ADDRESS,
3188                                    bt_pool_offset, 4096);
3189       if (result != VK_SUCCESS)
3190          goto fail_surface_state_pool;
3191    }
3192 
3193    if (device->info.has_aux_map) {
3194       device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3195                                                &physical_device->info);
3196       if (!device->aux_map_ctx)
3197          goto fail_binding_table_pool;
3198    }
3199 
3200    result = anv_device_alloc_bo(device, "workaround", 4096,
3201                                 ANV_BO_ALLOC_CAPTURE |
3202                                 ANV_BO_ALLOC_MAPPED |
3203                                 ANV_BO_ALLOC_LOCAL_MEM,
3204                                 0 /* explicit_address */,
3205                                 &device->workaround_bo);
3206    if (result != VK_SUCCESS)
3207       goto fail_surface_aux_map_pool;
3208 
3209    device->workaround_address = (struct anv_address) {
3210       .bo = device->workaround_bo,
3211       .offset = align_u32(
3212          intel_debug_write_identifiers(device->workaround_bo->map,
3213                                        device->workaround_bo->size,
3214                                        "Anv") + 8, 8),
3215    };
3216 
3217    device->debug_frame_desc =
3218       intel_debug_get_identifier_block(device->workaround_bo->map,
3219                                        device->workaround_bo->size,
3220                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
3221 
3222    result = anv_device_init_trivial_batch(device);
3223    if (result != VK_SUCCESS)
3224       goto fail_workaround_bo;
3225 
3226    /* Allocate a null surface state at surface state offset 0.  This makes
3227     * NULL descriptor handling trivial because we can just memset structures
3228     * to zero and they have a valid descriptor.
3229     */
3230    device->null_surface_state =
3231       anv_state_pool_alloc(&device->surface_state_pool,
3232                            device->isl_dev.ss.size,
3233                            device->isl_dev.ss.align);
3234    isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3235                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3236    assert(device->null_surface_state.offset == 0);
3237 
3238    anv_scratch_pool_init(device, &device->scratch_pool);
3239 
3240    /* TODO(RT): Do we want some sort of data structure for this? */
3241    memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3242 
3243    result = anv_genX(&device->info, init_device_state)(device);
3244    if (result != VK_SUCCESS)
3245       goto fail_trivial_batch_bo_and_scratch_pool;
3246 
3247    anv_pipeline_cache_init(&device->default_pipeline_cache, device,
3248                            true /* cache_enabled */, false /* external_sync */);
3249 
3250    result = anv_device_init_rt_shaders(device);
3251    if (result != VK_SUCCESS)
3252       goto fail_rt_trampoline;
3253 
3254    anv_device_init_blorp(device);
3255 
3256    anv_device_init_border_colors(device);
3257 
3258    anv_device_perf_init(device);
3259 
3260    *pDevice = anv_device_to_handle(device);
3261 
3262    return VK_SUCCESS;
3263 
3264  fail_rt_trampoline:
3265    anv_pipeline_cache_finish(&device->default_pipeline_cache);
3266  fail_trivial_batch_bo_and_scratch_pool:
3267    anv_scratch_pool_finish(device, &device->scratch_pool);
3268    anv_device_release_bo(device, device->trivial_batch_bo);
3269  fail_workaround_bo:
3270    anv_device_release_bo(device, device->workaround_bo);
3271  fail_surface_aux_map_pool:
3272    if (device->info.has_aux_map) {
3273       intel_aux_map_finish(device->aux_map_ctx);
3274       device->aux_map_ctx = NULL;
3275    }
3276  fail_binding_table_pool:
3277    if (physical_device->use_softpin)
3278       anv_state_pool_finish(&device->binding_table_pool);
3279  fail_surface_state_pool:
3280    anv_state_pool_finish(&device->surface_state_pool);
3281  fail_instruction_state_pool:
3282    anv_state_pool_finish(&device->instruction_state_pool);
3283  fail_dynamic_state_pool:
3284    if (device->info.ver >= 8)
3285       anv_state_reserved_pool_finish(&device->custom_border_colors);
3286    anv_state_pool_finish(&device->dynamic_state_pool);
3287  fail_general_state_pool:
3288    anv_state_pool_finish(&device->general_state_pool);
3289  fail_batch_bo_pool:
3290    anv_bo_pool_finish(&device->batch_bo_pool);
3291    anv_bo_cache_finish(&device->bo_cache);
3292  fail_queue_cond:
3293    pthread_cond_destroy(&device->queue_submit);
3294  fail_mutex:
3295    pthread_mutex_destroy(&device->mutex);
3296  fail_vmas:
3297    if (physical_device->use_softpin) {
3298       util_vma_heap_finish(&device->vma_hi);
3299       util_vma_heap_finish(&device->vma_cva);
3300       util_vma_heap_finish(&device->vma_lo);
3301    }
3302  fail_queues:
3303    for (uint32_t i = 0; i < device->queue_count; i++)
3304       anv_queue_finish(&device->queues[i]);
3305    vk_free(&device->vk.alloc, device->queues);
3306  fail_context_id:
3307    anv_gem_destroy_context(device, device->context_id);
3308  fail_fd:
3309    close(device->fd);
3310  fail_device:
3311    vk_device_finish(&device->vk);
3312  fail_alloc:
3313    vk_free(&device->vk.alloc, device);
3314 
3315    return result;
3316 }
3317 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3318 void anv_DestroyDevice(
3319     VkDevice                                    _device,
3320     const VkAllocationCallbacks*                pAllocator)
3321 {
3322    ANV_FROM_HANDLE(anv_device, device, _device);
3323 
3324    if (!device)
3325       return;
3326 
3327    anv_device_finish_blorp(device);
3328 
3329    anv_device_finish_rt_shaders(device);
3330 
3331    anv_pipeline_cache_finish(&device->default_pipeline_cache);
3332 
3333 #ifdef HAVE_VALGRIND
3334    /* We only need to free these to prevent valgrind errors.  The backing
3335     * BO will go away in a couple of lines so we don't actually leak.
3336     */
3337    if (device->info.ver >= 8)
3338       anv_state_reserved_pool_finish(&device->custom_border_colors);
3339    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3340    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3341 #endif
3342 
3343    for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3344       if (device->rt_scratch_bos[i] != NULL)
3345          anv_device_release_bo(device, device->rt_scratch_bos[i]);
3346    }
3347 
3348    anv_scratch_pool_finish(device, &device->scratch_pool);
3349 
3350    anv_device_release_bo(device, device->workaround_bo);
3351    anv_device_release_bo(device, device->trivial_batch_bo);
3352 
3353    if (device->info.has_aux_map) {
3354       intel_aux_map_finish(device->aux_map_ctx);
3355       device->aux_map_ctx = NULL;
3356    }
3357 
3358    if (device->physical->use_softpin)
3359       anv_state_pool_finish(&device->binding_table_pool);
3360    anv_state_pool_finish(&device->surface_state_pool);
3361    anv_state_pool_finish(&device->instruction_state_pool);
3362    anv_state_pool_finish(&device->dynamic_state_pool);
3363    anv_state_pool_finish(&device->general_state_pool);
3364 
3365    anv_bo_pool_finish(&device->batch_bo_pool);
3366 
3367    anv_bo_cache_finish(&device->bo_cache);
3368 
3369    if (device->physical->use_softpin) {
3370       util_vma_heap_finish(&device->vma_hi);
3371       util_vma_heap_finish(&device->vma_cva);
3372       util_vma_heap_finish(&device->vma_lo);
3373    }
3374 
3375    pthread_cond_destroy(&device->queue_submit);
3376    pthread_mutex_destroy(&device->mutex);
3377 
3378    for (uint32_t i = 0; i < device->queue_count; i++)
3379       anv_queue_finish(&device->queues[i]);
3380    vk_free(&device->vk.alloc, device->queues);
3381 
3382    anv_gem_destroy_context(device, device->context_id);
3383 
3384    if (INTEL_DEBUG(DEBUG_BATCH))
3385       intel_batch_decode_ctx_finish(&device->decoder_ctx);
3386 
3387    close(device->fd);
3388 
3389    vk_device_finish(&device->vk);
3390    vk_free(&device->vk.alloc, device);
3391 }
3392 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3393 VkResult anv_EnumerateInstanceLayerProperties(
3394     uint32_t*                                   pPropertyCount,
3395     VkLayerProperties*                          pProperties)
3396 {
3397    if (pProperties == NULL) {
3398       *pPropertyCount = 0;
3399       return VK_SUCCESS;
3400    }
3401 
3402    /* None supported at this time */
3403    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3404 }
3405 
3406 void
_anv_device_report_lost(struct anv_device * device)3407 _anv_device_report_lost(struct anv_device *device)
3408 {
3409    assert(p_atomic_read(&device->_lost) > 0);
3410 
3411    device->lost_reported = true;
3412 
3413    for (uint32_t i = 0; i < device->queue_count; i++) {
3414       struct anv_queue *queue = &device->queues[i];
3415       if (queue->lost) {
3416          __vk_errorf(queue, VK_ERROR_DEVICE_LOST,
3417                      queue->error_file, queue->error_line,
3418                      "%s", queue->error_msg);
3419       }
3420    }
3421 }
3422 
3423 VkResult
_anv_device_set_lost(struct anv_device * device,const char * file,int line,const char * msg,...)3424 _anv_device_set_lost(struct anv_device *device,
3425                      const char *file, int line,
3426                      const char *msg, ...)
3427 {
3428    VkResult err;
3429    va_list ap;
3430 
3431    if (p_atomic_read(&device->_lost) > 0)
3432       return VK_ERROR_DEVICE_LOST;
3433 
3434    p_atomic_inc(&device->_lost);
3435    device->lost_reported = true;
3436 
3437    va_start(ap, msg);
3438    err = __vk_errorv(device, VK_ERROR_DEVICE_LOST, file, line, msg, ap);
3439    va_end(ap);
3440 
3441    if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
3442       abort();
3443 
3444    return err;
3445 }
3446 
3447 VkResult
_anv_queue_set_lost(struct anv_queue * queue,const char * file,int line,const char * msg,...)3448 _anv_queue_set_lost(struct anv_queue *queue,
3449                      const char *file, int line,
3450                      const char *msg, ...)
3451 {
3452    va_list ap;
3453 
3454    if (queue->lost)
3455       return VK_ERROR_DEVICE_LOST;
3456 
3457    queue->lost = true;
3458 
3459    queue->error_file = file;
3460    queue->error_line = line;
3461    va_start(ap, msg);
3462    vsnprintf(queue->error_msg, sizeof(queue->error_msg),
3463              msg, ap);
3464    va_end(ap);
3465 
3466    p_atomic_inc(&queue->device->_lost);
3467 
3468    if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
3469       abort();
3470 
3471    return VK_ERROR_DEVICE_LOST;
3472 }
3473 
3474 VkResult
anv_device_query_status(struct anv_device * device)3475 anv_device_query_status(struct anv_device *device)
3476 {
3477    /* This isn't likely as most of the callers of this function already check
3478     * for it.  However, it doesn't hurt to check and it potentially lets us
3479     * avoid an ioctl.
3480     */
3481    if (anv_device_is_lost(device))
3482       return VK_ERROR_DEVICE_LOST;
3483 
3484    uint32_t active, pending;
3485    int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
3486                                              &active, &pending);
3487    if (ret == -1) {
3488       /* We don't know the real error. */
3489       return anv_device_set_lost(device, "get_reset_stats failed: %m");
3490    }
3491 
3492    if (active) {
3493       return anv_device_set_lost(device, "GPU hung on one of our command buffers");
3494    } else if (pending) {
3495       return anv_device_set_lost(device, "GPU hung with commands in-flight");
3496    }
3497 
3498    return VK_SUCCESS;
3499 }
3500 
3501 VkResult
anv_device_bo_busy(struct anv_device * device,struct anv_bo * bo)3502 anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
3503 {
3504    /* Note:  This only returns whether or not the BO is in use by an i915 GPU.
3505     * Other usages of the BO (such as on different hardware) will not be
3506     * flagged as "busy" by this ioctl.  Use with care.
3507     */
3508    int ret = anv_gem_busy(device, bo->gem_handle);
3509    if (ret == 1) {
3510       return VK_NOT_READY;
3511    } else if (ret == -1) {
3512       /* We don't know the real error. */
3513       return anv_device_set_lost(device, "gem wait failed: %m");
3514    }
3515 
3516    /* Query for device status after the busy call.  If the BO we're checking
3517     * got caught in a GPU hang we don't want to return VK_SUCCESS to the
3518     * client because it clearly doesn't have valid data.  Yes, this most
3519     * likely means an ioctl, but we just did an ioctl to query the busy status
3520     * so it's no great loss.
3521     */
3522    return anv_device_query_status(device);
3523 }
3524 
3525 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)3526 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3527                 int64_t timeout)
3528 {
3529    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3530    if (ret == -1 && errno == ETIME) {
3531       return VK_TIMEOUT;
3532    } else if (ret == -1) {
3533       /* We don't know the real error. */
3534       return anv_device_set_lost(device, "gem wait failed: %m");
3535    }
3536 
3537    /* Query for device status after the wait.  If the BO we're waiting on got
3538     * caught in a GPU hang we don't want to return VK_SUCCESS to the client
3539     * because it clearly doesn't have valid data.  Yes, this most likely means
3540     * an ioctl, but we just did an ioctl to wait so it's no great loss.
3541     */
3542    return anv_device_query_status(device);
3543 }
3544 
3545 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)3546 anv_vma_alloc(struct anv_device *device,
3547               uint64_t size, uint64_t align,
3548               enum anv_bo_alloc_flags alloc_flags,
3549               uint64_t client_address)
3550 {
3551    pthread_mutex_lock(&device->vma_mutex);
3552 
3553    uint64_t addr = 0;
3554 
3555    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3556       if (client_address) {
3557          if (util_vma_heap_alloc_addr(&device->vma_cva,
3558                                       client_address, size)) {
3559             addr = client_address;
3560          }
3561       } else {
3562          addr = util_vma_heap_alloc(&device->vma_cva, size, align);
3563       }
3564       /* We don't want to fall back to other heaps */
3565       goto done;
3566    }
3567 
3568    assert(client_address == 0);
3569 
3570    if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3571       addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3572 
3573    if (addr == 0)
3574       addr = util_vma_heap_alloc(&device->vma_lo, size, align);
3575 
3576 done:
3577    pthread_mutex_unlock(&device->vma_mutex);
3578 
3579    assert(addr == intel_48b_address(addr));
3580    return intel_canonical_address(addr);
3581 }
3582 
3583 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)3584 anv_vma_free(struct anv_device *device,
3585              uint64_t address, uint64_t size)
3586 {
3587    const uint64_t addr_48b = intel_48b_address(address);
3588 
3589    pthread_mutex_lock(&device->vma_mutex);
3590 
3591    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3592        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3593       util_vma_heap_free(&device->vma_lo, addr_48b, size);
3594    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3595               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3596       util_vma_heap_free(&device->vma_cva, addr_48b, size);
3597    } else {
3598       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3599       util_vma_heap_free(&device->vma_hi, addr_48b, size);
3600    }
3601 
3602    pthread_mutex_unlock(&device->vma_mutex);
3603 }
3604 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)3605 VkResult anv_AllocateMemory(
3606     VkDevice                                    _device,
3607     const VkMemoryAllocateInfo*                 pAllocateInfo,
3608     const VkAllocationCallbacks*                pAllocator,
3609     VkDeviceMemory*                             pMem)
3610 {
3611    ANV_FROM_HANDLE(anv_device, device, _device);
3612    struct anv_physical_device *pdevice = device->physical;
3613    struct anv_device_memory *mem;
3614    VkResult result = VK_SUCCESS;
3615 
3616    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3617 
3618    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3619    assert(pAllocateInfo->allocationSize > 0);
3620 
3621    VkDeviceSize aligned_alloc_size =
3622       align_u64(pAllocateInfo->allocationSize, 4096);
3623 
3624    if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3625       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3626 
3627    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3628    struct anv_memory_type *mem_type =
3629       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3630    assert(mem_type->heapIndex < pdevice->memory.heap_count);
3631    struct anv_memory_heap *mem_heap =
3632       &pdevice->memory.heaps[mem_type->heapIndex];
3633 
3634    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3635    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3636       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3637 
3638    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3639                          VK_OBJECT_TYPE_DEVICE_MEMORY);
3640    if (mem == NULL)
3641       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3642 
3643    mem->type = mem_type;
3644    mem->map = NULL;
3645    mem->map_size = 0;
3646    mem->ahw = NULL;
3647    mem->host_ptr = NULL;
3648 
3649    enum anv_bo_alloc_flags alloc_flags = 0;
3650 
3651    const VkExportMemoryAllocateInfo *export_info = NULL;
3652    const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3653    const VkImportMemoryFdInfoKHR *fd_info = NULL;
3654    const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3655    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3656    VkMemoryAllocateFlags vk_flags = 0;
3657    uint64_t client_address = 0;
3658 
3659    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3660       switch (ext->sType) {
3661       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3662          export_info = (void *)ext;
3663          break;
3664 
3665       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3666          ahw_import_info = (void *)ext;
3667          break;
3668 
3669       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3670          fd_info = (void *)ext;
3671          break;
3672 
3673       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3674          host_ptr_info = (void *)ext;
3675          break;
3676 
3677       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3678          const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3679          vk_flags = flags_info->flags;
3680          break;
3681       }
3682 
3683       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3684          dedicated_info = (void *)ext;
3685          break;
3686 
3687       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR: {
3688          const VkMemoryOpaqueCaptureAddressAllocateInfoKHR *addr_info =
3689             (const VkMemoryOpaqueCaptureAddressAllocateInfoKHR *)ext;
3690          client_address = addr_info->opaqueCaptureAddress;
3691          break;
3692       }
3693 
3694       default:
3695          anv_debug_ignored_stype(ext->sType);
3696          break;
3697       }
3698    }
3699 
3700    /* By default, we want all VkDeviceMemory objects to support CCS */
3701    if (device->physical->has_implicit_ccs)
3702       alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
3703 
3704    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR)
3705       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3706 
3707    if ((export_info && export_info->handleTypes) ||
3708        (fd_info && fd_info->handleType) ||
3709        (host_ptr_info && host_ptr_info->handleType)) {
3710       /* Anything imported or exported is EXTERNAL */
3711       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3712 
3713       /* We can't have implicit CCS on external memory with an AUX-table.
3714        * Doing so would require us to sync the aux tables across processes
3715        * which is impractical.
3716        */
3717       if (device->info.has_aux_map)
3718          alloc_flags &= ~ANV_BO_ALLOC_IMPLICIT_CCS;
3719    }
3720 
3721    /* Check if we need to support Android HW buffer export. If so,
3722     * create AHardwareBuffer and import memory from it.
3723     */
3724    bool android_export = false;
3725    if (export_info && export_info->handleTypes &
3726        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3727       android_export = true;
3728 
3729    if (ahw_import_info) {
3730       result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3731       if (result != VK_SUCCESS)
3732          goto fail;
3733 
3734       goto success;
3735    } else if (android_export) {
3736       result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3737       if (result != VK_SUCCESS)
3738          goto fail;
3739 
3740       goto success;
3741    }
3742 
3743    /* The Vulkan spec permits handleType to be 0, in which case the struct is
3744     * ignored.
3745     */
3746    if (fd_info && fd_info->handleType) {
3747       /* At the moment, we support only the below handle types. */
3748       assert(fd_info->handleType ==
3749                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3750              fd_info->handleType ==
3751                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3752 
3753       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3754                                     client_address, &mem->bo);
3755       if (result != VK_SUCCESS)
3756          goto fail;
3757 
3758       /* For security purposes, we reject importing the bo if it's smaller
3759        * than the requested allocation size.  This prevents a malicious client
3760        * from passing a buffer to a trusted client, lying about the size, and
3761        * telling the trusted client to try and texture from an image that goes
3762        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
3763        * in the trusted client.  The trusted client can protect itself against
3764        * this sort of attack but only if it can trust the buffer size.
3765        */
3766       if (mem->bo->size < aligned_alloc_size) {
3767          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3768                             "aligned allocationSize too large for "
3769                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3770                             "%"PRIu64"B > %"PRIu64"B",
3771                             aligned_alloc_size, mem->bo->size);
3772          anv_device_release_bo(device, mem->bo);
3773          goto fail;
3774       }
3775 
3776       /* From the Vulkan spec:
3777        *
3778        *    "Importing memory from a file descriptor transfers ownership of
3779        *    the file descriptor from the application to the Vulkan
3780        *    implementation. The application must not perform any operations on
3781        *    the file descriptor after a successful import."
3782        *
3783        * If the import fails, we leave the file descriptor open.
3784        */
3785       close(fd_info->fd);
3786       goto success;
3787    }
3788 
3789    if (host_ptr_info && host_ptr_info->handleType) {
3790       if (host_ptr_info->handleType ==
3791           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3792          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3793          goto fail;
3794       }
3795 
3796       assert(host_ptr_info->handleType ==
3797              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3798 
3799       result = anv_device_import_bo_from_host_ptr(device,
3800                                                   host_ptr_info->pHostPointer,
3801                                                   pAllocateInfo->allocationSize,
3802                                                   alloc_flags,
3803                                                   client_address,
3804                                                   &mem->bo);
3805       if (result != VK_SUCCESS)
3806          goto fail;
3807 
3808       mem->host_ptr = host_ptr_info->pHostPointer;
3809       goto success;
3810    }
3811 
3812    /* Set ALLOC_LOCAL_MEM flag if heap has device local bit set and requested
3813     * memory property flag has DEVICE_LOCAL_BIT set.
3814     */
3815    if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
3816       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM;
3817 
3818    /* Regular allocate (not importing memory). */
3819 
3820    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3821                                 alloc_flags, client_address, &mem->bo);
3822    if (result != VK_SUCCESS)
3823       goto fail;
3824 
3825    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3826       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3827 
3828       /* Some legacy (non-modifiers) consumers need the tiling to be set on
3829        * the BO.  In this case, we have a dedicated allocation.
3830        */
3831       if (image->vk.wsi_legacy_scanout) {
3832          const uint32_t i915_tiling =
3833             isl_tiling_to_i915_tiling(image->planes[0].primary_surface.isl.tiling);
3834          int ret = anv_gem_set_tiling(device, mem->bo->gem_handle,
3835                                       image->planes[0].primary_surface.isl.row_pitch_B,
3836                                       i915_tiling);
3837          if (ret) {
3838             anv_device_release_bo(device, mem->bo);
3839             result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3840                                "failed to set BO tiling: %m");
3841             goto fail;
3842          }
3843       }
3844    }
3845 
3846  success:
3847    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3848    if (mem_heap_used > mem_heap->size) {
3849       p_atomic_add(&mem_heap->used, -mem->bo->size);
3850       anv_device_release_bo(device, mem->bo);
3851       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3852                          "Out of heap memory");
3853       goto fail;
3854    }
3855 
3856    pthread_mutex_lock(&device->mutex);
3857    list_addtail(&mem->link, &device->memory_objects);
3858    pthread_mutex_unlock(&device->mutex);
3859 
3860    *pMem = anv_device_memory_to_handle(mem);
3861 
3862    return VK_SUCCESS;
3863 
3864  fail:
3865    vk_object_free(&device->vk, pAllocator, mem);
3866 
3867    return result;
3868 }
3869 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3870 VkResult anv_GetMemoryFdKHR(
3871     VkDevice                                    device_h,
3872     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
3873     int*                                        pFd)
3874 {
3875    ANV_FROM_HANDLE(anv_device, dev, device_h);
3876    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3877 
3878    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3879 
3880    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3881           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3882 
3883    return anv_device_export_bo(dev, mem->bo, pFd);
3884 }
3885 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3886 VkResult anv_GetMemoryFdPropertiesKHR(
3887     VkDevice                                    _device,
3888     VkExternalMemoryHandleTypeFlagBits          handleType,
3889     int                                         fd,
3890     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
3891 {
3892    ANV_FROM_HANDLE(anv_device, device, _device);
3893 
3894    switch (handleType) {
3895    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3896       /* dma-buf can be imported as any memory type */
3897       pMemoryFdProperties->memoryTypeBits =
3898          (1 << device->physical->memory.type_count) - 1;
3899       return VK_SUCCESS;
3900 
3901    default:
3902       /* The valid usage section for this function says:
3903        *
3904        *    "handleType must not be one of the handle types defined as
3905        *    opaque."
3906        *
3907        * So opaque handle types fall into the default "unsupported" case.
3908        */
3909       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3910    }
3911 }
3912 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3913 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3914    VkDevice                                    _device,
3915    VkExternalMemoryHandleTypeFlagBits          handleType,
3916    const void*                                 pHostPointer,
3917    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
3918 {
3919    ANV_FROM_HANDLE(anv_device, device, _device);
3920 
3921    assert(pMemoryHostPointerProperties->sType ==
3922           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3923 
3924    switch (handleType) {
3925    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3926       /* Host memory can be imported as any memory type. */
3927       pMemoryHostPointerProperties->memoryTypeBits =
3928          (1ull << device->physical->memory.type_count) - 1;
3929 
3930       return VK_SUCCESS;
3931 
3932    default:
3933       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3934    }
3935 }
3936 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3937 void anv_FreeMemory(
3938     VkDevice                                    _device,
3939     VkDeviceMemory                              _mem,
3940     const VkAllocationCallbacks*                pAllocator)
3941 {
3942    ANV_FROM_HANDLE(anv_device, device, _device);
3943    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3944 
3945    if (mem == NULL)
3946       return;
3947 
3948    pthread_mutex_lock(&device->mutex);
3949    list_del(&mem->link);
3950    pthread_mutex_unlock(&device->mutex);
3951 
3952    if (mem->map)
3953       anv_UnmapMemory(_device, _mem);
3954 
3955    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3956                 -mem->bo->size);
3957 
3958    anv_device_release_bo(device, mem->bo);
3959 
3960 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
3961    if (mem->ahw)
3962       AHardwareBuffer_release(mem->ahw);
3963 #endif
3964 
3965    vk_object_free(&device->vk, pAllocator, mem);
3966 }
3967 
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3968 VkResult anv_MapMemory(
3969     VkDevice                                    _device,
3970     VkDeviceMemory                              _memory,
3971     VkDeviceSize                                offset,
3972     VkDeviceSize                                size,
3973     VkMemoryMapFlags                            flags,
3974     void**                                      ppData)
3975 {
3976    ANV_FROM_HANDLE(anv_device, device, _device);
3977    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3978 
3979    if (mem == NULL) {
3980       *ppData = NULL;
3981       return VK_SUCCESS;
3982    }
3983 
3984    if (mem->host_ptr) {
3985       *ppData = mem->host_ptr + offset;
3986       return VK_SUCCESS;
3987    }
3988 
3989    if (size == VK_WHOLE_SIZE)
3990       size = mem->bo->size - offset;
3991 
3992    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3993     *
3994     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3995     *    assert(size != 0);
3996     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3997     *    equal to the size of the memory minus offset
3998     */
3999    assert(size > 0);
4000    assert(offset + size <= mem->bo->size);
4001 
4002    /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
4003     * takes a VkDeviceMemory pointer, it seems like only one map of the memory
4004     * at a time is valid. We could just mmap up front and return an offset
4005     * pointer here, but that may exhaust virtual memory on 32 bit
4006     * userspace. */
4007 
4008    uint32_t gem_flags = 0;
4009 
4010    if (!device->info.has_llc &&
4011        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
4012       gem_flags |= I915_MMAP_WC;
4013 
4014    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
4015    uint64_t map_offset;
4016    if (!device->physical->has_mmap_offset)
4017       map_offset = offset & ~4095ull;
4018    else
4019       map_offset = 0;
4020    assert(offset >= map_offset);
4021    uint64_t map_size = (offset + size) - map_offset;
4022 
4023    /* Let's map whole pages */
4024    map_size = align_u64(map_size, 4096);
4025 
4026    void *map = anv_gem_mmap(device, mem->bo->gem_handle,
4027                             map_offset, map_size, gem_flags);
4028    if (map == MAP_FAILED)
4029       return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
4030 
4031    mem->map = map;
4032    mem->map_size = map_size;
4033    mem->map_delta = (offset - map_offset);
4034 
4035    *ppData = mem->map + mem->map_delta;
4036 
4037    return VK_SUCCESS;
4038 }
4039 
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)4040 void anv_UnmapMemory(
4041     VkDevice                                    _device,
4042     VkDeviceMemory                              _memory)
4043 {
4044    ANV_FROM_HANDLE(anv_device, device, _device);
4045    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4046 
4047    if (mem == NULL || mem->host_ptr)
4048       return;
4049 
4050    anv_gem_munmap(device, mem->map, mem->map_size);
4051 
4052    mem->map = NULL;
4053    mem->map_size = 0;
4054    mem->map_delta = 0;
4055 }
4056 
4057 static void
clflush_mapped_ranges(struct anv_device * device,uint32_t count,const VkMappedMemoryRange * ranges)4058 clflush_mapped_ranges(struct anv_device         *device,
4059                       uint32_t                   count,
4060                       const VkMappedMemoryRange *ranges)
4061 {
4062    for (uint32_t i = 0; i < count; i++) {
4063       ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
4064       uint64_t map_offset = ranges[i].offset + mem->map_delta;
4065       if (map_offset >= mem->map_size)
4066          continue;
4067 
4068       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4069          continue;
4070 
4071       intel_clflush_range(mem->map + map_offset,
4072                           MIN2(ranges[i].size, mem->map_size - map_offset));
4073    }
4074 }
4075 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)4076 VkResult anv_FlushMappedMemoryRanges(
4077     VkDevice                                    _device,
4078     uint32_t                                    memoryRangeCount,
4079     const VkMappedMemoryRange*                  pMemoryRanges)
4080 {
4081    ANV_FROM_HANDLE(anv_device, device, _device);
4082 
4083    if (!device->physical->memory.need_clflush)
4084       return VK_SUCCESS;
4085 
4086    /* Make sure the writes we're flushing have landed. */
4087    __builtin_ia32_mfence();
4088 
4089    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
4090 
4091    return VK_SUCCESS;
4092 }
4093 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)4094 VkResult anv_InvalidateMappedMemoryRanges(
4095     VkDevice                                    _device,
4096     uint32_t                                    memoryRangeCount,
4097     const VkMappedMemoryRange*                  pMemoryRanges)
4098 {
4099    ANV_FROM_HANDLE(anv_device, device, _device);
4100 
4101    if (!device->physical->memory.need_clflush)
4102       return VK_SUCCESS;
4103 
4104    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
4105 
4106    /* Make sure no reads get moved up above the invalidate. */
4107    __builtin_ia32_mfence();
4108 
4109    return VK_SUCCESS;
4110 }
4111 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)4112 void anv_GetDeviceMemoryCommitment(
4113     VkDevice                                    device,
4114     VkDeviceMemory                              memory,
4115     VkDeviceSize*                               pCommittedMemoryInBytes)
4116 {
4117    *pCommittedMemoryInBytes = 0;
4118 }
4119 
4120 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)4121 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
4122 {
4123    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4124    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4125 
4126    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4127 
4128    if (mem) {
4129       assert(pBindInfo->memoryOffset < mem->bo->size);
4130       assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->size);
4131       buffer->address = (struct anv_address) {
4132          .bo = mem->bo,
4133          .offset = pBindInfo->memoryOffset,
4134       };
4135    } else {
4136       buffer->address = ANV_NULL_ADDRESS;
4137    }
4138 }
4139 
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)4140 VkResult anv_BindBufferMemory2(
4141     VkDevice                                    device,
4142     uint32_t                                    bindInfoCount,
4143     const VkBindBufferMemoryInfo*               pBindInfos)
4144 {
4145    for (uint32_t i = 0; i < bindInfoCount; i++)
4146       anv_bind_buffer_memory(&pBindInfos[i]);
4147 
4148    return VK_SUCCESS;
4149 }
4150 
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)4151 VkResult anv_QueueBindSparse(
4152     VkQueue                                     _queue,
4153     uint32_t                                    bindInfoCount,
4154     const VkBindSparseInfo*                     pBindInfo,
4155     VkFence                                     fence)
4156 {
4157    ANV_FROM_HANDLE(anv_queue, queue, _queue);
4158    if (anv_device_is_lost(queue->device))
4159       return VK_ERROR_DEVICE_LOST;
4160 
4161    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
4162 }
4163 
4164 // Event functions
4165 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)4166 VkResult anv_CreateEvent(
4167     VkDevice                                    _device,
4168     const VkEventCreateInfo*                    pCreateInfo,
4169     const VkAllocationCallbacks*                pAllocator,
4170     VkEvent*                                    pEvent)
4171 {
4172    ANV_FROM_HANDLE(anv_device, device, _device);
4173    struct anv_event *event;
4174 
4175    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4176 
4177    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4178                            VK_OBJECT_TYPE_EVENT);
4179    if (event == NULL)
4180       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4181 
4182    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4183                                        sizeof(uint64_t), 8);
4184    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4185 
4186    *pEvent = anv_event_to_handle(event);
4187 
4188    return VK_SUCCESS;
4189 }
4190 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)4191 void anv_DestroyEvent(
4192     VkDevice                                    _device,
4193     VkEvent                                     _event,
4194     const VkAllocationCallbacks*                pAllocator)
4195 {
4196    ANV_FROM_HANDLE(anv_device, device, _device);
4197    ANV_FROM_HANDLE(anv_event, event, _event);
4198 
4199    if (!event)
4200       return;
4201 
4202    anv_state_pool_free(&device->dynamic_state_pool, event->state);
4203 
4204    vk_object_free(&device->vk, pAllocator, event);
4205 }
4206 
anv_GetEventStatus(VkDevice _device,VkEvent _event)4207 VkResult anv_GetEventStatus(
4208     VkDevice                                    _device,
4209     VkEvent                                     _event)
4210 {
4211    ANV_FROM_HANDLE(anv_device, device, _device);
4212    ANV_FROM_HANDLE(anv_event, event, _event);
4213 
4214    if (anv_device_is_lost(device))
4215       return VK_ERROR_DEVICE_LOST;
4216 
4217    return *(uint64_t *)event->state.map;
4218 }
4219 
anv_SetEvent(VkDevice _device,VkEvent _event)4220 VkResult anv_SetEvent(
4221     VkDevice                                    _device,
4222     VkEvent                                     _event)
4223 {
4224    ANV_FROM_HANDLE(anv_event, event, _event);
4225 
4226    *(uint64_t *)event->state.map = VK_EVENT_SET;
4227 
4228    return VK_SUCCESS;
4229 }
4230 
anv_ResetEvent(VkDevice _device,VkEvent _event)4231 VkResult anv_ResetEvent(
4232     VkDevice                                    _device,
4233     VkEvent                                     _event)
4234 {
4235    ANV_FROM_HANDLE(anv_event, event, _event);
4236 
4237    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4238 
4239    return VK_SUCCESS;
4240 }
4241 
4242 // Buffer functions
4243 
4244 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)4245 anv_get_buffer_memory_requirements(struct anv_device *device,
4246                                    VkDeviceSize size,
4247                                    VkBufferUsageFlags usage,
4248                                    VkMemoryRequirements2* pMemoryRequirements)
4249 {
4250    /* The Vulkan spec (git aaed022) says:
4251     *
4252     *    memoryTypeBits is a bitfield and contains one bit set for every
4253     *    supported memory type for the resource. The bit `1<<i` is set if and
4254     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4255     *    structure for the physical device is supported.
4256     */
4257    uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4258 
4259    /* Base alignment requirement of a cache line */
4260    uint32_t alignment = 16;
4261 
4262    if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4263       alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
4264 
4265    pMemoryRequirements->memoryRequirements.size = size;
4266    pMemoryRequirements->memoryRequirements.alignment = alignment;
4267 
4268    /* Storage and Uniform buffers should have their size aligned to
4269     * 32-bits to avoid boundary checks when last DWord is not complete.
4270     * This would ensure that not internal padding would be needed for
4271     * 16-bit types.
4272     */
4273    if (device->robust_buffer_access &&
4274        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4275         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4276       pMemoryRequirements->memoryRequirements.size = align_u64(size, 4);
4277 
4278    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4279 
4280    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4281       switch (ext->sType) {
4282       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4283          VkMemoryDedicatedRequirements *requirements = (void *)ext;
4284          requirements->prefersDedicatedAllocation = false;
4285          requirements->requiresDedicatedAllocation = false;
4286          break;
4287       }
4288 
4289       default:
4290          anv_debug_ignored_stype(ext->sType);
4291          break;
4292       }
4293    }
4294 }
4295 
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)4296 void anv_GetBufferMemoryRequirements2(
4297     VkDevice                                    _device,
4298     const VkBufferMemoryRequirementsInfo2*      pInfo,
4299     VkMemoryRequirements2*                      pMemoryRequirements)
4300 {
4301    ANV_FROM_HANDLE(anv_device, device, _device);
4302    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4303 
4304    anv_get_buffer_memory_requirements(device,
4305                                       buffer->size,
4306                                       buffer->usage,
4307                                       pMemoryRequirements);
4308 }
4309 
anv_GetDeviceBufferMemoryRequirementsKHR(VkDevice _device,const VkDeviceBufferMemoryRequirementsKHR * pInfo,VkMemoryRequirements2 * pMemoryRequirements)4310 void anv_GetDeviceBufferMemoryRequirementsKHR(
4311     VkDevice                                    _device,
4312     const VkDeviceBufferMemoryRequirementsKHR* pInfo,
4313     VkMemoryRequirements2*                      pMemoryRequirements)
4314 {
4315    ANV_FROM_HANDLE(anv_device, device, _device);
4316 
4317    anv_get_buffer_memory_requirements(device,
4318                                       pInfo->pCreateInfo->size,
4319                                       pInfo->pCreateInfo->usage,
4320                                       pMemoryRequirements);
4321 }
4322 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)4323 VkResult anv_CreateBuffer(
4324     VkDevice                                    _device,
4325     const VkBufferCreateInfo*                   pCreateInfo,
4326     const VkAllocationCallbacks*                pAllocator,
4327     VkBuffer*                                   pBuffer)
4328 {
4329    ANV_FROM_HANDLE(anv_device, device, _device);
4330    struct anv_buffer *buffer;
4331 
4332    /* Don't allow creating buffers bigger than our address space.  The real
4333     * issue here is that we may align up the buffer size and we don't want
4334     * doing so to cause roll-over.  However, no one has any business
4335     * allocating a buffer larger than our GTT size.
4336     */
4337    if (pCreateInfo->size > device->physical->gtt_size)
4338       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4339 
4340    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4341 
4342    buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
4343                             VK_OBJECT_TYPE_BUFFER);
4344    if (buffer == NULL)
4345       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4346 
4347    buffer->create_flags = pCreateInfo->flags;
4348    buffer->size = pCreateInfo->size;
4349    buffer->usage = pCreateInfo->usage;
4350    buffer->address = ANV_NULL_ADDRESS;
4351 
4352    *pBuffer = anv_buffer_to_handle(buffer);
4353 
4354    return VK_SUCCESS;
4355 }
4356 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)4357 void anv_DestroyBuffer(
4358     VkDevice                                    _device,
4359     VkBuffer                                    _buffer,
4360     const VkAllocationCallbacks*                pAllocator)
4361 {
4362    ANV_FROM_HANDLE(anv_device, device, _device);
4363    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4364 
4365    if (!buffer)
4366       return;
4367 
4368    vk_object_free(&device->vk, pAllocator, buffer);
4369 }
4370 
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfoKHR * pInfo)4371 VkDeviceAddress anv_GetBufferDeviceAddress(
4372     VkDevice                                    device,
4373     const VkBufferDeviceAddressInfoKHR*         pInfo)
4374 {
4375    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4376 
4377    assert(!anv_address_is_null(buffer->address));
4378    assert(buffer->address.bo->flags & EXEC_OBJECT_PINNED);
4379 
4380    return anv_address_physical(buffer->address);
4381 }
4382 
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfoKHR * pInfo)4383 uint64_t anv_GetBufferOpaqueCaptureAddress(
4384     VkDevice                                    device,
4385     const VkBufferDeviceAddressInfoKHR*         pInfo)
4386 {
4387    return 0;
4388 }
4389 
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfoKHR * pInfo)4390 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4391     VkDevice                                    device,
4392     const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo)
4393 {
4394    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4395 
4396    assert(memory->bo->flags & EXEC_OBJECT_PINNED);
4397    assert(memory->bo->has_client_visible_address);
4398 
4399    return intel_48b_address(memory->bo->offset);
4400 }
4401 
4402 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)4403 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
4404                               enum isl_format format,
4405                               isl_surf_usage_flags_t usage,
4406                               struct anv_address address,
4407                               uint32_t range, uint32_t stride)
4408 {
4409    isl_buffer_fill_state(&device->isl_dev, state.map,
4410                          .address = anv_address_physical(address),
4411                          .mocs = isl_mocs(&device->isl_dev, usage,
4412                                           address.bo && address.bo->is_external),
4413                          .size_B = range,
4414                          .format = format,
4415                          .swizzle = ISL_SWIZZLE_IDENTITY,
4416                          .stride_B = stride);
4417 }
4418 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)4419 void anv_DestroySampler(
4420     VkDevice                                    _device,
4421     VkSampler                                   _sampler,
4422     const VkAllocationCallbacks*                pAllocator)
4423 {
4424    ANV_FROM_HANDLE(anv_device, device, _device);
4425    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4426 
4427    if (!sampler)
4428       return;
4429 
4430    if (sampler->bindless_state.map) {
4431       anv_state_pool_free(&device->dynamic_state_pool,
4432                           sampler->bindless_state);
4433    }
4434 
4435    if (sampler->custom_border_color.map) {
4436       anv_state_reserved_pool_free(&device->custom_border_colors,
4437                                    sampler->custom_border_color);
4438    }
4439 
4440    vk_object_free(&device->vk, pAllocator, sampler);
4441 }
4442 
anv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)4443 VkResult anv_CreateFramebuffer(
4444     VkDevice                                    _device,
4445     const VkFramebufferCreateInfo*              pCreateInfo,
4446     const VkAllocationCallbacks*                pAllocator,
4447     VkFramebuffer*                              pFramebuffer)
4448 {
4449    ANV_FROM_HANDLE(anv_device, device, _device);
4450    struct anv_framebuffer *framebuffer;
4451 
4452    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4453 
4454    size_t size = sizeof(*framebuffer);
4455 
4456    /* VK_KHR_imageless_framebuffer extension says:
4457     *
4458     *    If flags includes VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR,
4459     *    parameter pAttachments is ignored.
4460     */
4461    if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR))
4462       size += sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
4463 
4464    framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
4465                                  VK_OBJECT_TYPE_FRAMEBUFFER);
4466    if (framebuffer == NULL)
4467       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4468 
4469    framebuffer->width = pCreateInfo->width;
4470    framebuffer->height = pCreateInfo->height;
4471    framebuffer->layers = pCreateInfo->layers;
4472 
4473    if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR)) {
4474       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4475          ANV_FROM_HANDLE(anv_image_view, iview, pCreateInfo->pAttachments[i]);
4476          framebuffer->attachments[i] = iview;
4477       }
4478       framebuffer->attachment_count = pCreateInfo->attachmentCount;
4479    }
4480 
4481    *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
4482 
4483    return VK_SUCCESS;
4484 }
4485 
anv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)4486 void anv_DestroyFramebuffer(
4487     VkDevice                                    _device,
4488     VkFramebuffer                               _fb,
4489     const VkAllocationCallbacks*                pAllocator)
4490 {
4491    ANV_FROM_HANDLE(anv_device, device, _device);
4492    ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
4493 
4494    if (!fb)
4495       return;
4496 
4497    vk_object_free(&device->vk, pAllocator, fb);
4498 }
4499 
4500 static const VkTimeDomainEXT anv_time_domains[] = {
4501    VK_TIME_DOMAIN_DEVICE_EXT,
4502    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4503 #ifdef CLOCK_MONOTONIC_RAW
4504    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4505 #endif
4506 };
4507 
anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)4508 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4509    VkPhysicalDevice                             physicalDevice,
4510    uint32_t                                     *pTimeDomainCount,
4511    VkTimeDomainEXT                              *pTimeDomains)
4512 {
4513    int d;
4514    VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
4515 
4516    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4517       vk_outarray_append(&out, i) {
4518          *i = anv_time_domains[d];
4519       }
4520    }
4521 
4522    return vk_outarray_status(&out);
4523 }
4524 
4525 static uint64_t
anv_clock_gettime(clockid_t clock_id)4526 anv_clock_gettime(clockid_t clock_id)
4527 {
4528    struct timespec current;
4529    int ret;
4530 
4531    ret = clock_gettime(clock_id, &current);
4532 #ifdef CLOCK_MONOTONIC_RAW
4533    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
4534       ret = clock_gettime(CLOCK_MONOTONIC, &current);
4535 #endif
4536    if (ret < 0)
4537       return 0;
4538 
4539    return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
4540 }
4541 
anv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)4542 VkResult anv_GetCalibratedTimestampsEXT(
4543    VkDevice                                     _device,
4544    uint32_t                                     timestampCount,
4545    const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
4546    uint64_t                                     *pTimestamps,
4547    uint64_t                                     *pMaxDeviation)
4548 {
4549    ANV_FROM_HANDLE(anv_device, device, _device);
4550    uint64_t timestamp_frequency = device->info.timestamp_frequency;
4551    int  ret;
4552    int d;
4553    uint64_t begin, end;
4554    uint64_t max_clock_period = 0;
4555 
4556 #ifdef CLOCK_MONOTONIC_RAW
4557    begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4558 #else
4559    begin = anv_clock_gettime(CLOCK_MONOTONIC);
4560 #endif
4561 
4562    for (d = 0; d < timestampCount; d++) {
4563       switch (pTimestampInfos[d].timeDomain) {
4564       case VK_TIME_DOMAIN_DEVICE_EXT:
4565          ret = anv_gem_reg_read(device->fd, TIMESTAMP | I915_REG_READ_8B_WA,
4566                                 &pTimestamps[d]);
4567 
4568          if (ret != 0) {
4569             return anv_device_set_lost(device, "Failed to read the TIMESTAMP "
4570                                                "register: %m");
4571          }
4572          uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
4573          max_clock_period = MAX2(max_clock_period, device_period);
4574          break;
4575       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
4576          pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
4577          max_clock_period = MAX2(max_clock_period, 1);
4578          break;
4579 
4580 #ifdef CLOCK_MONOTONIC_RAW
4581       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
4582          pTimestamps[d] = begin;
4583          break;
4584 #endif
4585       default:
4586          pTimestamps[d] = 0;
4587          break;
4588       }
4589    }
4590 
4591 #ifdef CLOCK_MONOTONIC_RAW
4592    end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4593 #else
4594    end = anv_clock_gettime(CLOCK_MONOTONIC);
4595 #endif
4596 
4597     /*
4598      * The maximum deviation is the sum of the interval over which we
4599      * perform the sampling and the maximum period of any sampled
4600      * clock. That's because the maximum skew between any two sampled
4601      * clock edges is when the sampled clock with the largest period is
4602      * sampled at the end of that period but right at the beginning of the
4603      * sampling interval and some other clock is sampled right at the
4604      * begining of its sampling period and right at the end of the
4605      * sampling interval. Let's assume the GPU has the longest clock
4606      * period and that the application is sampling GPU and monotonic:
4607      *
4608      *                               s                 e
4609      *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
4610      *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4611      *
4612      *                               g
4613      *		  0         1         2         3
4614      *	GPU       -----_____-----_____-----_____-----_____
4615      *
4616      *                                                m
4617      *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
4618      *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4619      *
4620      *	Interval                     <----------------->
4621      *	Deviation           <-------------------------->
4622      *
4623      *		s  = read(raw)       2
4624      *		g  = read(GPU)       1
4625      *		m  = read(monotonic) 2
4626      *		e  = read(raw)       b
4627      *
4628      * We round the sample interval up by one tick to cover sampling error
4629      * in the interval clock
4630      */
4631 
4632    uint64_t sample_interval = end - begin + 1;
4633 
4634    *pMaxDeviation = sample_interval + max_clock_period;
4635 
4636    return VK_SUCCESS;
4637 }
4638 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)4639 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
4640     VkPhysicalDevice                            physicalDevice,
4641     VkSampleCountFlagBits                       samples,
4642     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
4643 {
4644    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4645 
4646    assert(pMultisampleProperties->sType ==
4647           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
4648 
4649    VkExtent2D grid_size;
4650    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4651       grid_size.width = 1;
4652       grid_size.height = 1;
4653    } else {
4654       grid_size.width = 0;
4655       grid_size.height = 0;
4656    }
4657    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4658 
4659    vk_foreach_struct(ext, pMultisampleProperties->pNext)
4660       anv_debug_ignored_stype(ext->sType);
4661 }
4662 
4663 /* vk_icd.h does not declare this function, so we declare it here to
4664  * suppress Wmissing-prototypes.
4665  */
4666 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4667 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
4668 
4669 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)4670 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
4671 {
4672    /* For the full details on loader interface versioning, see
4673     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4674     * What follows is a condensed summary, to help you navigate the large and
4675     * confusing official doc.
4676     *
4677     *   - Loader interface v0 is incompatible with later versions. We don't
4678     *     support it.
4679     *
4680     *   - In loader interface v1:
4681     *       - The first ICD entrypoint called by the loader is
4682     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4683     *         entrypoint.
4684     *       - The ICD must statically expose no other Vulkan symbol unless it is
4685     *         linked with -Bsymbolic.
4686     *       - Each dispatchable Vulkan handle created by the ICD must be
4687     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
4688     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4689     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4690     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
4691     *         such loader-managed surfaces.
4692     *
4693     *    - Loader interface v2 differs from v1 in:
4694     *       - The first ICD entrypoint called by the loader is
4695     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4696     *         statically expose this entrypoint.
4697     *
4698     *    - Loader interface v3 differs from v2 in:
4699     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4700     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4701     *          because the loader no longer does so.
4702     *
4703     *    - Loader interface v4 differs from v3 in:
4704     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
4705     */
4706    *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
4707    return VK_SUCCESS;
4708 }
4709 
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)4710 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
4711     VkPhysicalDevice                            physicalDevice,
4712     uint32_t*                                   pFragmentShadingRateCount,
4713     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
4714 {
4715    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4716    VK_OUTARRAY_MAKE(out, pFragmentShadingRates, pFragmentShadingRateCount);
4717 
4718 #define append_rate(_samples, _width, _height)                          \
4719    do {                                                                 \
4720       vk_outarray_append(&out, __r) {                                   \
4721          __r->sampleCounts = _samples;                                  \
4722          __r->fragmentSize = (VkExtent2D) {                             \
4723             .width = _width,                                            \
4724             .height = _height,                                          \
4725          };                                                             \
4726       }                                                                 \
4727    } while (0)
4728 
4729    VkSampleCountFlags sample_counts =
4730       isl_device_get_sample_counts(&physical_device->isl_dev);
4731 
4732    for (uint32_t x = 4; x >= 1; x /= 2) {
4733        for (uint32_t y = 4; y >= 1; y /= 2) {
4734           /* For size {1, 1}, the sample count must be ~0 */
4735           if (x == 1 && y == 1)
4736              append_rate(~0, x, y);
4737           else
4738              append_rate(sample_counts, x, y);
4739       }
4740    }
4741 
4742 #undef append_rate
4743 
4744    return vk_outarray_status(&out);
4745 }
4746