1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include "drm-uapi/i915_drm.h"
34 
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #ifndef NDEBUG
40 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
41 #endif
42 #else
43 #define VG(x) ((void)0)
44 #endif
45 
46 #include "common/gen_clflush.h"
47 #include "common/gen_decoder.h"
48 #include "common/gen_gem.h"
49 #include "common/gen_l3_config.h"
50 #include "dev/gen_device_info.h"
51 #include "blorp/blorp.h"
52 #include "compiler/brw_compiler.h"
53 #include "util/bitset.h"
54 #include "util/macros.h"
55 #include "util/hash_table.h"
56 #include "util/list.h"
57 #include "util/sparse_array.h"
58 #include "util/u_atomic.h"
59 #include "util/u_vector.h"
60 #include "util/u_math.h"
61 #include "util/vma.h"
62 #include "util/xmlconfig.h"
63 #include "vk_alloc.h"
64 #include "vk_debug_report.h"
65 #include "vk_object.h"
66 
67 /* Pre-declarations needed for WSI entrypoints */
68 struct wl_surface;
69 struct wl_display;
70 typedef struct xcb_connection_t xcb_connection_t;
71 typedef uint32_t xcb_visualid_t;
72 typedef uint32_t xcb_window_t;
73 
74 struct anv_batch;
75 struct anv_buffer;
76 struct anv_buffer_view;
77 struct anv_image_view;
78 struct anv_instance;
79 
80 struct gen_aux_map_context;
81 struct gen_perf_config;
82 struct gen_perf_counter_pass;
83 struct gen_perf_query_result;
84 
85 #include <vulkan/vulkan.h>
86 #include <vulkan/vulkan_intel.h>
87 #include <vulkan/vk_icd.h>
88 
89 #include "anv_android.h"
90 #include "anv_entrypoints.h"
91 #include "anv_extensions.h"
92 #include "isl/isl.h"
93 
94 #include "dev/gen_debug.h"
95 #include "common/intel_log.h"
96 #include "wsi_common.h"
97 
98 #define NSEC_PER_SEC 1000000000ull
99 
100 /* anv Virtual Memory Layout
101  * =========================
102  *
103  * When the anv driver is determining the virtual graphics addresses of memory
104  * objects itself using the softpin mechanism, the following memory ranges
105  * will be used.
106  *
107  * Three special considerations to notice:
108  *
109  * (1) the dynamic state pool is located within the same 4 GiB as the low
110  * heap. This is to work around a VF cache issue described in a comment in
111  * anv_physical_device_init_heaps.
112  *
113  * (2) the binding table pool is located at lower addresses than the surface
114  * state pool, within a 4 GiB range. This allows surface state base addresses
115  * to cover both binding tables (16 bit offsets) and surface states (32 bit
116  * offsets).
117  *
118  * (3) the last 4 GiB of the address space is withheld from the high
119  * heap. Various hardware units will read past the end of an object for
120  * various reasons. This healthy margin prevents reads from wrapping around
121  * 48-bit addresses.
122  */
123 #define LOW_HEAP_MIN_ADDRESS               0x000000001000ULL /* 4 KiB */
124 #define LOW_HEAP_MAX_ADDRESS               0x0000bfffffffULL
125 #define DYNAMIC_STATE_POOL_MIN_ADDRESS     0x0000c0000000ULL /* 3 GiB */
126 #define DYNAMIC_STATE_POOL_MAX_ADDRESS     0x0000ffffffffULL
127 #define BINDING_TABLE_POOL_MIN_ADDRESS     0x000100000000ULL /* 4 GiB */
128 #define BINDING_TABLE_POOL_MAX_ADDRESS     0x00013fffffffULL
129 #define SURFACE_STATE_POOL_MIN_ADDRESS     0x000140000000ULL /* 5 GiB */
130 #define SURFACE_STATE_POOL_MAX_ADDRESS     0x00017fffffffULL
131 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
132 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
133 #define CLIENT_VISIBLE_HEAP_MIN_ADDRESS    0x0001c0000000ULL /* 7 GiB */
134 #define CLIENT_VISIBLE_HEAP_MAX_ADDRESS    0x0002bfffffffULL
135 #define HIGH_HEAP_MIN_ADDRESS              0x0002c0000000ULL /* 11 GiB */
136 
137 #define LOW_HEAP_SIZE               \
138    (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
139 #define DYNAMIC_STATE_POOL_SIZE     \
140    (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
141 #define BINDING_TABLE_POOL_SIZE     \
142    (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
143 #define SURFACE_STATE_POOL_SIZE     \
144    (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
145 #define INSTRUCTION_STATE_POOL_SIZE \
146    (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
147 #define CLIENT_VISIBLE_HEAP_SIZE               \
148    (CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)
149 
150 /* Allowing different clear colors requires us to perform a depth resolve at
151  * the end of certain render passes. This is because while slow clears store
152  * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
153  * See the PRMs for examples describing when additional resolves would be
154  * necessary. To enable fast clears without requiring extra resolves, we set
155  * the clear value to a globally-defined one. We could allow different values
156  * if the user doesn't expect coherent data during or after a render passes
157  * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
158  * don't seem to exist yet. In almost all Vulkan applications tested thus far,
159  * 1.0f seems to be the only value used. The only application that doesn't set
160  * this value does so through the usage of an seemingly uninitialized clear
161  * value.
162  */
163 #define ANV_HZ_FC_VAL 1.0f
164 
165 #define MAX_VBS         28
166 #define MAX_XFB_BUFFERS  4
167 #define MAX_XFB_STREAMS  4
168 #define MAX_SETS         8
169 #define MAX_RTS          8
170 #define MAX_VIEWPORTS   16
171 #define MAX_SCISSORS    16
172 #define MAX_PUSH_CONSTANTS_SIZE 128
173 #define MAX_DYNAMIC_BUFFERS 16
174 #define MAX_IMAGES 64
175 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
176 #define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
177 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
178 /* We need 16 for UBO block reads to work and 32 for push UBOs. However, we
179  * use 64 here to avoid cache issues. This could most likely bring it back to
180  * 32 if we had different virtual addresses for the different views on a given
181  * GEM object.
182  */
183 #define ANV_UBO_ALIGNMENT 64
184 #define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4
185 #define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16
186 
187 /* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
188  *
189  *    "The surface state model is used when a Binding Table Index (specified
190  *    in the message descriptor) of less than 240 is specified. In this model,
191  *    the Binding Table Index is used to index into the binding table, and the
192  *    binding table entry contains a pointer to the SURFACE_STATE."
193  *
194  * Binding table values above 240 are used for various things in the hardware
195  * such as stateless, stateless with incoherent cache, SLM, and bindless.
196  */
197 #define MAX_BINDING_TABLE_SIZE 240
198 
199 /* The kernel relocation API has a limitation of a 32-bit delta value
200  * applied to the address before it is written which, in spite of it being
201  * unsigned, is treated as signed .  Because of the way that this maps to
202  * the Vulkan API, we cannot handle an offset into a buffer that does not
203  * fit into a signed 32 bits.  The only mechanism we have for dealing with
204  * this at the moment is to limit all VkDeviceMemory objects to a maximum
205  * of 2GB each.  The Vulkan spec allows us to do this:
206  *
207  *    "Some platforms may have a limit on the maximum size of a single
208  *    allocation. For example, certain systems may fail to create
209  *    allocations with a size greater than or equal to 4GB. Such a limit is
210  *    implementation-dependent, and if such a failure occurs then the error
211  *    VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
212  *
213  * We don't use vk_error here because it's not an error so much as an
214  * indication to the application that the allocation is too large.
215  */
216 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
217 
218 #define ANV_SVGS_VB_INDEX    MAX_VBS
219 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
220 
221 /* We reserve this MI ALU register for the purpose of handling predication.
222  * Other code which uses the MI ALU should leave it alone.
223  */
224 #define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */
225 
226 /* We reserve this MI ALU register to pass around an offset computed from
227  * VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.
228  * Other code which uses the MI ALU should leave it alone.
229  */
230 #define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */
231 
232 /* For gen12 we set the streamout buffers using 4 separate commands
233  * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout
234  * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of
235  * 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the
236  * 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.
237  * SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for
238  * 3DSTATE_SO_BUFFER_INDEX_0.
239  */
240 #define SO_BUFFER_INDEX_0_CMD 0x60
241 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
242 
243 static inline uint32_t
align_down_npot_u32(uint32_t v,uint32_t a)244 align_down_npot_u32(uint32_t v, uint32_t a)
245 {
246    return v - (v % a);
247 }
248 
249 static inline uint32_t
align_down_u32(uint32_t v,uint32_t a)250 align_down_u32(uint32_t v, uint32_t a)
251 {
252    assert(a != 0 && a == (a & -a));
253    return v & ~(a - 1);
254 }
255 
256 static inline uint32_t
align_u32(uint32_t v,uint32_t a)257 align_u32(uint32_t v, uint32_t a)
258 {
259    assert(a != 0 && a == (a & -a));
260    return align_down_u32(v + a - 1, a);
261 }
262 
263 static inline uint64_t
align_down_u64(uint64_t v,uint64_t a)264 align_down_u64(uint64_t v, uint64_t a)
265 {
266    assert(a != 0 && a == (a & -a));
267    return v & ~(a - 1);
268 }
269 
270 static inline uint64_t
align_u64(uint64_t v,uint64_t a)271 align_u64(uint64_t v, uint64_t a)
272 {
273    return align_down_u64(v + a - 1, a);
274 }
275 
276 static inline int32_t
align_i32(int32_t v,int32_t a)277 align_i32(int32_t v, int32_t a)
278 {
279    assert(a != 0 && a == (a & -a));
280    return (v + a - 1) & ~(a - 1);
281 }
282 
283 /** Alignment must be a power of 2. */
284 static inline bool
anv_is_aligned(uintmax_t n,uintmax_t a)285 anv_is_aligned(uintmax_t n, uintmax_t a)
286 {
287    assert(a == (a & -a));
288    return (n & (a - 1)) == 0;
289 }
290 
291 static inline uint32_t
anv_minify(uint32_t n,uint32_t levels)292 anv_minify(uint32_t n, uint32_t levels)
293 {
294    if (unlikely(n == 0))
295       return 0;
296    else
297       return MAX2(n >> levels, 1);
298 }
299 
300 static inline float
anv_clamp_f(float f,float min,float max)301 anv_clamp_f(float f, float min, float max)
302 {
303    assert(min < max);
304 
305    if (f > max)
306       return max;
307    else if (f < min)
308       return min;
309    else
310       return f;
311 }
312 
313 static inline bool
anv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)314 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
315 {
316    if (*inout_mask & clear_mask) {
317       *inout_mask &= ~clear_mask;
318       return true;
319    } else {
320       return false;
321    }
322 }
323 
324 static inline union isl_color_value
vk_to_isl_color(VkClearColorValue color)325 vk_to_isl_color(VkClearColorValue color)
326 {
327    return (union isl_color_value) {
328       .u32 = {
329          color.uint32[0],
330          color.uint32[1],
331          color.uint32[2],
332          color.uint32[3],
333       },
334    };
335 }
336 
anv_unpack_ptr(uintptr_t ptr,int bits,int * flags)337 static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)
338 {
339    uintptr_t mask = (1ull << bits) - 1;
340    *flags = ptr & mask;
341    return (void *) (ptr & ~mask);
342 }
343 
anv_pack_ptr(void * ptr,int bits,int flags)344 static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)
345 {
346    uintptr_t value = (uintptr_t) ptr;
347    uintptr_t mask = (1ull << bits) - 1;
348    return value | (mask & flags);
349 }
350 
351 #define for_each_bit(b, dword)                          \
352    for (uint32_t __dword = (dword);                     \
353         (b) = __builtin_ffs(__dword) - 1, __dword;      \
354         __dword &= ~(1 << (b)))
355 
356 #define typed_memcpy(dest, src, count) ({ \
357    STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
358    memcpy((dest), (src), (count) * sizeof(*(src))); \
359 })
360 
361 /* Mapping from anv object to VkDebugReportObjectTypeEXT. New types need
362  * to be added here in order to utilize mapping in debug/error/perf macros.
363  */
364 #define REPORT_OBJECT_TYPE(o)                                                      \
365    __builtin_choose_expr (                                                         \
366    __builtin_types_compatible_p (__typeof (o), struct anv_instance*),              \
367    VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT,                                       \
368    __builtin_choose_expr (                                                         \
369    __builtin_types_compatible_p (__typeof (o), struct anv_physical_device*),       \
370    VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT,                                \
371    __builtin_choose_expr (                                                         \
372    __builtin_types_compatible_p (__typeof (o), struct anv_device*),                \
373    VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,                                         \
374    __builtin_choose_expr (                                                         \
375    __builtin_types_compatible_p (__typeof (o), const struct anv_device*),          \
376    VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,                                         \
377    __builtin_choose_expr (                                                         \
378    __builtin_types_compatible_p (__typeof (o), struct anv_queue*),                 \
379    VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT,                                          \
380    __builtin_choose_expr (                                                         \
381    __builtin_types_compatible_p (__typeof (o), struct anv_semaphore*),             \
382    VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT,                                      \
383    __builtin_choose_expr (                                                         \
384    __builtin_types_compatible_p (__typeof (o), struct anv_cmd_buffer*),            \
385    VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT,                                 \
386    __builtin_choose_expr (                                                         \
387    __builtin_types_compatible_p (__typeof (o), struct anv_fence*),                 \
388    VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT,                                          \
389    __builtin_choose_expr (                                                         \
390    __builtin_types_compatible_p (__typeof (o), struct anv_device_memory*),         \
391    VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT,                                  \
392    __builtin_choose_expr (                                                         \
393    __builtin_types_compatible_p (__typeof (o), struct anv_buffer*),                \
394    VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT,                                         \
395    __builtin_choose_expr (                                                         \
396    __builtin_types_compatible_p (__typeof (o), struct anv_image*),                 \
397    VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,                                          \
398    __builtin_choose_expr (                                                         \
399    __builtin_types_compatible_p (__typeof (o), const struct anv_image*),           \
400    VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,                                          \
401    __builtin_choose_expr (                                                         \
402    __builtin_types_compatible_p (__typeof (o), struct anv_event*),                 \
403    VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT,                                          \
404    __builtin_choose_expr (                                                         \
405    __builtin_types_compatible_p (__typeof (o), struct anv_query_pool*),            \
406    VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT,                                     \
407    __builtin_choose_expr (                                                         \
408    __builtin_types_compatible_p (__typeof (o), struct anv_buffer_view*),           \
409    VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT,                                    \
410    __builtin_choose_expr (                                                         \
411    __builtin_types_compatible_p (__typeof (o), struct anv_image_view*),            \
412    VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT,                                     \
413    __builtin_choose_expr (                                                         \
414    __builtin_types_compatible_p (__typeof (o), struct anv_shader_module*),         \
415    VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT,                                  \
416    __builtin_choose_expr (                                                         \
417    __builtin_types_compatible_p (__typeof (o), struct anv_pipeline_cache*),        \
418    VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT,                                 \
419    __builtin_choose_expr (                                                         \
420    __builtin_types_compatible_p (__typeof (o), struct anv_pipeline_layout*),       \
421    VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT,                                \
422    __builtin_choose_expr (                                                         \
423    __builtin_types_compatible_p (__typeof (o), struct anv_render_pass*),           \
424    VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT,                                    \
425    __builtin_choose_expr (                                                         \
426    __builtin_types_compatible_p (__typeof (o), struct anv_pipeline*),              \
427    VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT,                                       \
428    __builtin_choose_expr (                                                         \
429    __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_set_layout*), \
430    VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT,                          \
431    __builtin_choose_expr (                                                         \
432    __builtin_types_compatible_p (__typeof (o), struct anv_sampler*),               \
433    VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT,                                        \
434    __builtin_choose_expr (                                                         \
435    __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_pool*),       \
436    VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT,                                \
437    __builtin_choose_expr (                                                         \
438    __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_set*),        \
439    VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT,                                 \
440    __builtin_choose_expr (                                                         \
441    __builtin_types_compatible_p (__typeof (o), struct anv_framebuffer*),           \
442    VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT,                                    \
443    __builtin_choose_expr (                                                         \
444    __builtin_types_compatible_p (__typeof (o), struct anv_cmd_pool*),              \
445    VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT,                                   \
446    __builtin_choose_expr (                                                         \
447    __builtin_types_compatible_p (__typeof (o), struct anv_surface*),               \
448    VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT,                                    \
449    __builtin_choose_expr (                                                         \
450    __builtin_types_compatible_p (__typeof (o), struct wsi_swapchain*),             \
451    VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT,                                  \
452    __builtin_choose_expr (                                                         \
453    __builtin_types_compatible_p (__typeof (o), struct vk_debug_callback*),         \
454    VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT,                      \
455    __builtin_choose_expr (                                                         \
456    __builtin_types_compatible_p (__typeof (o), void*),                             \
457    VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,                                        \
458    /* The void expression results in a compile-time error                          \
459       when assigning the result to something.  */                                  \
460    (void)0)))))))))))))))))))))))))))))))
461 
462 /* Whenever we generate an error, pass it through this function. Useful for
463  * debugging, where we can break on it. Only call at error site, not when
464  * propagating errors. Might be useful to plug in a stack trace here.
465  */
466 
467 VkResult __vk_errorv(struct anv_instance *instance, const void *object,
468                      VkDebugReportObjectTypeEXT type, VkResult error,
469                      const char *file, int line, const char *format,
470                      va_list args);
471 
472 VkResult __vk_errorf(struct anv_instance *instance, const void *object,
473                      VkDebugReportObjectTypeEXT type, VkResult error,
474                      const char *file, int line, const char *format, ...)
475    anv_printflike(7, 8);
476 
477 #ifdef DEBUG
478 #define vk_error(error) __vk_errorf(NULL, NULL,\
479                                     VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,\
480                                     error, __FILE__, __LINE__, NULL)
481 #define vk_errorfi(instance, obj, error, format, ...)\
482     __vk_errorf(instance, obj, REPORT_OBJECT_TYPE(obj), error,\
483                 __FILE__, __LINE__, format, ## __VA_ARGS__)
484 #define vk_errorf(device, obj, error, format, ...)\
485    vk_errorfi(anv_device_instance_or_null(device),\
486               obj, error, format, ## __VA_ARGS__)
487 #else
488 #define vk_error(error) error
489 #define vk_errorfi(instance, obj, error, format, ...) error
490 #define vk_errorf(device, obj, error, format, ...) error
491 #endif
492 
493 /**
494  * Warn on ignored extension structs.
495  *
496  * The Vulkan spec requires us to ignore unsupported or unknown structs in
497  * a pNext chain.  In debug mode, emitting warnings for ignored structs may
498  * help us discover structs that we should not have ignored.
499  *
500  *
501  * From the Vulkan 1.0.38 spec:
502  *
503  *    Any component of the implementation (the loader, any enabled layers,
504  *    and drivers) must skip over, without processing (other than reading the
505  *    sType and pNext members) any chained structures with sType values not
506  *    defined by extensions supported by that component.
507  */
508 #define anv_debug_ignored_stype(sType) \
509    intel_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
510 
511 void __anv_perf_warn(struct anv_device *device, const void *object,
512                      VkDebugReportObjectTypeEXT type, const char *file,
513                      int line, const char *format, ...)
514    anv_printflike(6, 7);
515 void anv_loge(const char *format, ...) anv_printflike(1, 2);
516 void anv_loge_v(const char *format, va_list va);
517 
518 /**
519  * Print a FINISHME message, including its source location.
520  */
521 #define anv_finishme(format, ...) \
522    do { \
523       static bool reported = false; \
524       if (!reported) { \
525          intel_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
526                     ##__VA_ARGS__); \
527          reported = true; \
528       } \
529    } while (0)
530 
531 /**
532  * Print a perf warning message.  Set INTEL_DEBUG=perf to see these.
533  */
534 #define anv_perf_warn(instance, obj, format, ...) \
535    do { \
536       static bool reported = false; \
537       if (!reported && unlikely(INTEL_DEBUG & DEBUG_PERF)) { \
538          __anv_perf_warn(instance, obj, REPORT_OBJECT_TYPE(obj), __FILE__, __LINE__,\
539                          format, ##__VA_ARGS__); \
540          reported = true; \
541       } \
542    } while (0)
543 
544 /* A non-fatal assert.  Useful for debugging. */
545 #ifdef DEBUG
546 #define anv_assert(x) ({ \
547    if (unlikely(!(x))) \
548       intel_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
549 })
550 #else
551 #define anv_assert(x)
552 #endif
553 
554 /* A multi-pointer allocator
555  *
556  * When copying data structures from the user (such as a render pass), it's
557  * common to need to allocate data for a bunch of different things.  Instead
558  * of doing several allocations and having to handle all of the error checking
559  * that entails, it can be easier to do a single allocation.  This struct
560  * helps facilitate that.  The intended usage looks like this:
561  *
562  *    ANV_MULTIALLOC(ma)
563  *    anv_multialloc_add(&ma, &main_ptr, 1);
564  *    anv_multialloc_add(&ma, &substruct1, substruct1Count);
565  *    anv_multialloc_add(&ma, &substruct2, substruct2Count);
566  *
567  *    if (!anv_multialloc_alloc(&ma, pAllocator, VK_ALLOCATION_SCOPE_FOO))
568  *       return vk_error(VK_ERROR_OUT_OF_HOST_MEORY);
569  */
570 struct anv_multialloc {
571     size_t size;
572     size_t align;
573 
574     uint32_t ptr_count;
575     void **ptrs[8];
576 };
577 
578 #define ANV_MULTIALLOC_INIT \
579    ((struct anv_multialloc) { 0, })
580 
581 #define ANV_MULTIALLOC(_name) \
582    struct anv_multialloc _name = ANV_MULTIALLOC_INIT
583 
584 __attribute__((always_inline))
585 static inline void
_anv_multialloc_add(struct anv_multialloc * ma,void ** ptr,size_t size,size_t align)586 _anv_multialloc_add(struct anv_multialloc *ma,
587                     void **ptr, size_t size, size_t align)
588 {
589    size_t offset = align_u64(ma->size, align);
590    ma->size = offset + size;
591    ma->align = MAX2(ma->align, align);
592 
593    /* Store the offset in the pointer. */
594    *ptr = (void *)(uintptr_t)offset;
595 
596    assert(ma->ptr_count < ARRAY_SIZE(ma->ptrs));
597    ma->ptrs[ma->ptr_count++] = ptr;
598 }
599 
600 #define anv_multialloc_add_size(_ma, _ptr, _size) \
601    _anv_multialloc_add((_ma), (void **)(_ptr), (_size), __alignof__(**(_ptr)))
602 
603 #define anv_multialloc_add(_ma, _ptr, _count) \
604    anv_multialloc_add_size(_ma, _ptr, (_count) * sizeof(**(_ptr)));
605 
606 __attribute__((always_inline))
607 static inline void *
anv_multialloc_alloc(struct anv_multialloc * ma,const VkAllocationCallbacks * alloc,VkSystemAllocationScope scope)608 anv_multialloc_alloc(struct anv_multialloc *ma,
609                      const VkAllocationCallbacks *alloc,
610                      VkSystemAllocationScope scope)
611 {
612    void *ptr = vk_alloc(alloc, ma->size, ma->align, scope);
613    if (!ptr)
614       return NULL;
615 
616    /* Fill out each of the pointers with their final value.
617     *
618     *   for (uint32_t i = 0; i < ma->ptr_count; i++)
619     *      *ma->ptrs[i] = ptr + (uintptr_t)*ma->ptrs[i];
620     *
621     * Unfortunately, even though ma->ptr_count is basically guaranteed to be a
622     * constant, GCC is incapable of figuring this out and unrolling the loop
623     * so we have to give it a little help.
624     */
625    STATIC_ASSERT(ARRAY_SIZE(ma->ptrs) == 8);
626 #define _ANV_MULTIALLOC_UPDATE_POINTER(_i) \
627    if ((_i) < ma->ptr_count) \
628       *ma->ptrs[_i] = ptr + (uintptr_t)*ma->ptrs[_i]
629    _ANV_MULTIALLOC_UPDATE_POINTER(0);
630    _ANV_MULTIALLOC_UPDATE_POINTER(1);
631    _ANV_MULTIALLOC_UPDATE_POINTER(2);
632    _ANV_MULTIALLOC_UPDATE_POINTER(3);
633    _ANV_MULTIALLOC_UPDATE_POINTER(4);
634    _ANV_MULTIALLOC_UPDATE_POINTER(5);
635    _ANV_MULTIALLOC_UPDATE_POINTER(6);
636    _ANV_MULTIALLOC_UPDATE_POINTER(7);
637 #undef _ANV_MULTIALLOC_UPDATE_POINTER
638 
639    return ptr;
640 }
641 
642 __attribute__((always_inline))
643 static inline void *
anv_multialloc_alloc2(struct anv_multialloc * ma,const VkAllocationCallbacks * parent_alloc,const VkAllocationCallbacks * alloc,VkSystemAllocationScope scope)644 anv_multialloc_alloc2(struct anv_multialloc *ma,
645                       const VkAllocationCallbacks *parent_alloc,
646                       const VkAllocationCallbacks *alloc,
647                       VkSystemAllocationScope scope)
648 {
649    return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
650 }
651 
652 struct anv_bo {
653    uint32_t gem_handle;
654 
655    uint32_t refcount;
656 
657    /* Index into the current validation list.  This is used by the
658     * validation list building alrogithm to track which buffers are already
659     * in the validation list so that we can ensure uniqueness.
660     */
661    uint32_t index;
662 
663    /* Index for use with util_sparse_array_free_list */
664    uint32_t free_index;
665 
666    /* Last known offset.  This value is provided by the kernel when we
667     * execbuf and is used as the presumed offset for the next bunch of
668     * relocations.
669     */
670    uint64_t offset;
671 
672    /** Size of the buffer not including implicit aux */
673    uint64_t size;
674 
675    /* Map for internally mapped BOs.
676     *
677     * If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.
678     */
679    void *map;
680 
681    /** Size of the implicit CCS range at the end of the buffer
682     *
683     * On Gen12, CCS data is always a direct 1/256 scale-down.  A single 64K
684     * page of main surface data maps to a 256B chunk of CCS data and that
685     * mapping is provided on TGL-LP by the AUX table which maps virtual memory
686     * addresses in the main surface to virtual memory addresses for CCS data.
687     *
688     * Because we can't change these maps around easily and because Vulkan
689     * allows two VkImages to be bound to overlapping memory regions (as long
690     * as the app is careful), it's not feasible to make this mapping part of
691     * the image.  (On Gen11 and earlier, the mapping was provided via
692     * RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)
693     * Instead, we attach the CCS data directly to the buffer object and setup
694     * the AUX table mapping at BO creation time.
695     *
696     * This field is for internal tracking use by the BO allocator only and
697     * should not be touched by other parts of the code.  If something wants to
698     * know if a BO has implicit CCS data, it should instead look at the
699     * has_implicit_ccs boolean below.
700     *
701     * This data is not included in maps of this buffer.
702     */
703    uint32_t _ccs_size;
704 
705    /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
706    uint32_t flags;
707 
708    /** True if this BO may be shared with other processes */
709    bool is_external:1;
710 
711    /** True if this BO is a wrapper
712     *
713     * When set to true, none of the fields in this BO are meaningful except
714     * for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.
715     * See also anv_bo_unwrap().  Wrapper BOs are not allowed when use_softpin
716     * is set in the physical device.
717     */
718    bool is_wrapper:1;
719 
720    /** See also ANV_BO_ALLOC_FIXED_ADDRESS */
721    bool has_fixed_address:1;
722 
723    /** True if this BO wraps a host pointer */
724    bool from_host_ptr:1;
725 
726    /** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */
727    bool has_client_visible_address:1;
728 
729    /** True if this BO has implicit CCS data attached to it */
730    bool has_implicit_ccs:1;
731 };
732 
733 static inline struct anv_bo *
anv_bo_ref(struct anv_bo * bo)734 anv_bo_ref(struct anv_bo *bo)
735 {
736    p_atomic_inc(&bo->refcount);
737    return bo;
738 }
739 
740 static inline struct anv_bo *
anv_bo_unwrap(struct anv_bo * bo)741 anv_bo_unwrap(struct anv_bo *bo)
742 {
743    while (bo->is_wrapper)
744       bo = bo->map;
745    return bo;
746 }
747 
748 /* Represents a lock-free linked list of "free" things.  This is used by
749  * both the block pool and the state pools.  Unfortunately, in order to
750  * solve the ABA problem, we can't use a single uint32_t head.
751  */
752 union anv_free_list {
753    struct {
754       uint32_t offset;
755 
756       /* A simple count that is incremented every time the head changes. */
757       uint32_t count;
758    };
759    /* Make sure it's aligned to 64 bits. This will make atomic operations
760     * faster on 32 bit platforms.
761     */
762    uint64_t u64 __attribute__ ((aligned (8)));
763 };
764 
765 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
766 
767 struct anv_block_state {
768    union {
769       struct {
770          uint32_t next;
771          uint32_t end;
772       };
773       /* Make sure it's aligned to 64 bits. This will make atomic operations
774        * faster on 32 bit platforms.
775        */
776       uint64_t u64 __attribute__ ((aligned (8)));
777    };
778 };
779 
780 #define anv_block_pool_foreach_bo(bo, pool)  \
781    for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \
782         _pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \
783         _pp_bo++)
784 
785 #define ANV_MAX_BLOCK_POOL_BOS 20
786 
787 struct anv_block_pool {
788    struct anv_device *device;
789    bool use_softpin;
790 
791    /* Wrapper BO for use in relocation lists.  This BO is simply a wrapper
792     * around the actual BO so that we grow the pool after the wrapper BO has
793     * been put in a relocation list.  This is only used in the non-softpin
794     * case.
795     */
796    struct anv_bo wrapper_bo;
797 
798    struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];
799    struct anv_bo *bo;
800    uint32_t nbos;
801 
802    uint64_t size;
803 
804    /* The address where the start of the pool is pinned. The various bos that
805     * are created as the pool grows will have addresses in the range
806     * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
807     */
808    uint64_t start_address;
809 
810    /* The offset from the start of the bo to the "center" of the block
811     * pool.  Pointers to allocated blocks are given by
812     * bo.map + center_bo_offset + offsets.
813     */
814    uint32_t center_bo_offset;
815 
816    /* Current memory map of the block pool.  This pointer may or may not
817     * point to the actual beginning of the block pool memory.  If
818     * anv_block_pool_alloc_back has ever been called, then this pointer
819     * will point to the "center" position of the buffer and all offsets
820     * (negative or positive) given out by the block pool alloc functions
821     * will be valid relative to this pointer.
822     *
823     * In particular, map == bo.map + center_offset
824     *
825     * DO NOT access this pointer directly. Use anv_block_pool_map() instead,
826     * since it will handle the softpin case as well, where this points to NULL.
827     */
828    void *map;
829    int fd;
830 
831    /**
832     * Array of mmaps and gem handles owned by the block pool, reclaimed when
833     * the block pool is destroyed.
834     */
835    struct u_vector mmap_cleanups;
836 
837    struct anv_block_state state;
838 
839    struct anv_block_state back_state;
840 };
841 
842 /* Block pools are backed by a fixed-size 1GB memfd */
843 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
844 
845 /* The center of the block pool is also the middle of the memfd.  This may
846  * change in the future if we decide differently for some reason.
847  */
848 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
849 
850 static inline uint32_t
anv_block_pool_size(struct anv_block_pool * pool)851 anv_block_pool_size(struct anv_block_pool *pool)
852 {
853    return pool->state.end + pool->back_state.end;
854 }
855 
856 struct anv_state {
857    int32_t offset;
858    uint32_t alloc_size;
859    void *map;
860    uint32_t idx;
861 };
862 
863 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
864 
865 struct anv_fixed_size_state_pool {
866    union anv_free_list free_list;
867    struct anv_block_state block;
868 };
869 
870 #define ANV_MIN_STATE_SIZE_LOG2 6
871 #define ANV_MAX_STATE_SIZE_LOG2 21
872 
873 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
874 
875 struct anv_free_entry {
876    uint32_t next;
877    struct anv_state state;
878 };
879 
880 struct anv_state_table {
881    struct anv_device *device;
882    int fd;
883    struct anv_free_entry *map;
884    uint32_t size;
885    struct anv_block_state state;
886    struct u_vector cleanups;
887 };
888 
889 struct anv_state_pool {
890    struct anv_block_pool block_pool;
891 
892    /* Offset into the relevant state base address where the state pool starts
893     * allocating memory.
894     */
895    int32_t start_offset;
896 
897    struct anv_state_table table;
898 
899    /* The size of blocks which will be allocated from the block pool */
900    uint32_t block_size;
901 
902    /** Free list for "back" allocations */
903    union anv_free_list back_alloc_free_list;
904 
905    struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
906 };
907 
908 struct anv_state_reserved_pool {
909    struct anv_state_pool *pool;
910    union anv_free_list reserved_blocks;
911    uint32_t count;
912 };
913 
914 struct anv_state_stream {
915    struct anv_state_pool *state_pool;
916 
917    /* The size of blocks to allocate from the state pool */
918    uint32_t block_size;
919 
920    /* Current block we're allocating from */
921    struct anv_state block;
922 
923    /* Offset into the current block at which to allocate the next state */
924    uint32_t next;
925 
926    /* List of all blocks allocated from this pool */
927    struct util_dynarray all_blocks;
928 };
929 
930 /* The block_pool functions exported for testing only.  The block pool should
931  * only be used via a state pool (see below).
932  */
933 VkResult anv_block_pool_init(struct anv_block_pool *pool,
934                              struct anv_device *device,
935                              uint64_t start_address,
936                              uint32_t initial_size);
937 void anv_block_pool_finish(struct anv_block_pool *pool);
938 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
939                              uint32_t block_size, uint32_t *padding);
940 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
941                                   uint32_t block_size);
942 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t
943 size);
944 
945 VkResult anv_state_pool_init(struct anv_state_pool *pool,
946                              struct anv_device *device,
947                              uint64_t base_address,
948                              int32_t start_offset,
949                              uint32_t block_size);
950 void anv_state_pool_finish(struct anv_state_pool *pool);
951 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
952                                       uint32_t state_size, uint32_t alignment);
953 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
954 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
955 void anv_state_stream_init(struct anv_state_stream *stream,
956                            struct anv_state_pool *state_pool,
957                            uint32_t block_size);
958 void anv_state_stream_finish(struct anv_state_stream *stream);
959 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
960                                         uint32_t size, uint32_t alignment);
961 
962 void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,
963                                       struct anv_state_pool *parent,
964                                       uint32_t count, uint32_t size,
965                                       uint32_t alignment);
966 void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);
967 struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);
968 void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,
969                                   struct anv_state state);
970 
971 VkResult anv_state_table_init(struct anv_state_table *table,
972                              struct anv_device *device,
973                              uint32_t initial_entries);
974 void anv_state_table_finish(struct anv_state_table *table);
975 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
976                              uint32_t count);
977 void anv_free_list_push(union anv_free_list *list,
978                         struct anv_state_table *table,
979                         uint32_t idx, uint32_t count);
980 struct anv_state* anv_free_list_pop(union anv_free_list *list,
981                                     struct anv_state_table *table);
982 
983 
984 static inline struct anv_state *
anv_state_table_get(struct anv_state_table * table,uint32_t idx)985 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
986 {
987    return &table->map[idx].state;
988 }
989 /**
990  * Implements a pool of re-usable BOs.  The interface is identical to that
991  * of block_pool except that each block is its own BO.
992  */
993 struct anv_bo_pool {
994    struct anv_device *device;
995 
996    struct util_sparse_array_free_list free_list[16];
997 };
998 
999 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device);
1000 void anv_bo_pool_finish(struct anv_bo_pool *pool);
1001 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
1002                            struct anv_bo **bo_out);
1003 void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
1004 
1005 struct anv_scratch_pool {
1006    /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
1007    struct anv_bo *bos[16][MESA_SHADER_STAGES];
1008 };
1009 
1010 void anv_scratch_pool_init(struct anv_device *device,
1011                            struct anv_scratch_pool *pool);
1012 void anv_scratch_pool_finish(struct anv_device *device,
1013                              struct anv_scratch_pool *pool);
1014 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
1015                                       struct anv_scratch_pool *pool,
1016                                       gl_shader_stage stage,
1017                                       unsigned per_thread_scratch);
1018 
1019 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
1020 struct anv_bo_cache {
1021    struct util_sparse_array bo_map;
1022    pthread_mutex_t mutex;
1023 };
1024 
1025 VkResult anv_bo_cache_init(struct anv_bo_cache *cache);
1026 void anv_bo_cache_finish(struct anv_bo_cache *cache);
1027 
1028 struct anv_memory_type {
1029    /* Standard bits passed on to the client */
1030    VkMemoryPropertyFlags   propertyFlags;
1031    uint32_t                heapIndex;
1032 };
1033 
1034 struct anv_memory_heap {
1035    /* Standard bits passed on to the client */
1036    VkDeviceSize      size;
1037    VkMemoryHeapFlags flags;
1038 
1039    /** Driver-internal book-keeping.
1040     *
1041     * Align it to 64 bits to make atomic operations faster on 32 bit platforms.
1042     */
1043    VkDeviceSize      used __attribute__ ((aligned (8)));
1044 };
1045 
1046 struct anv_physical_device {
1047     struct vk_object_base                       base;
1048 
1049     /* Link in anv_instance::physical_devices */
1050     struct list_head                            link;
1051 
1052     struct anv_instance *                       instance;
1053     bool                                        no_hw;
1054     char                                        path[20];
1055     const char *                                name;
1056     struct {
1057        uint16_t                                 domain;
1058        uint8_t                                  bus;
1059        uint8_t                                  device;
1060        uint8_t                                  function;
1061     }                                           pci_info;
1062     struct gen_device_info                      info;
1063     /** Amount of "GPU memory" we want to advertise
1064      *
1065      * Clearly, this value is bogus since Intel is a UMA architecture.  On
1066      * gen7 platforms, we are limited by GTT size unless we want to implement
1067      * fine-grained tracking and GTT splitting.  On Broadwell and above we are
1068      * practically unlimited.  However, we will never report more than 3/4 of
1069      * the total system ram to try and avoid running out of RAM.
1070      */
1071     bool                                        supports_48bit_addresses;
1072     struct brw_compiler *                       compiler;
1073     struct isl_device                           isl_dev;
1074     struct gen_perf_config *                    perf;
1075     int                                         cmd_parser_version;
1076     bool                                        has_softpin;
1077     bool                                        has_exec_async;
1078     bool                                        has_exec_capture;
1079     bool                                        has_exec_fence;
1080     bool                                        has_syncobj;
1081     bool                                        has_syncobj_wait;
1082     bool                                        has_context_priority;
1083     bool                                        has_context_isolation;
1084     bool                                        has_mem_available;
1085     bool                                        has_mmap_offset;
1086     uint64_t                                    gtt_size;
1087 
1088     bool                                        use_softpin;
1089     bool                                        always_use_bindless;
1090     bool                                        use_call_secondary;
1091 
1092     /** True if we can access buffers using A64 messages */
1093     bool                                        has_a64_buffer_access;
1094     /** True if we can use bindless access for images */
1095     bool                                        has_bindless_images;
1096     /** True if we can use bindless access for samplers */
1097     bool                                        has_bindless_samplers;
1098 
1099     /** True if we can read the GPU timestamp register
1100      *
1101      * When running in a virtual context, the timestamp register is unreadable
1102      * on Gen12+.
1103      */
1104     bool                                        has_reg_timestamp;
1105 
1106     /** True if this device has implicit AUX
1107      *
1108      * If true, CCS is handled as an implicit attachment to the BO rather than
1109      * as an explicitly bound surface.
1110      */
1111     bool                                        has_implicit_ccs;
1112 
1113     bool                                        always_flush_cache;
1114 
1115     struct anv_device_extension_table           supported_extensions;
1116 
1117     uint32_t                                    eu_total;
1118     uint32_t                                    subslice_total;
1119 
1120     struct {
1121       uint32_t                                  type_count;
1122       struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
1123       uint32_t                                  heap_count;
1124       struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
1125     } memory;
1126 
1127     uint8_t                                     driver_build_sha1[20];
1128     uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
1129     uint8_t                                     driver_uuid[VK_UUID_SIZE];
1130     uint8_t                                     device_uuid[VK_UUID_SIZE];
1131 
1132     struct disk_cache *                         disk_cache;
1133 
1134     struct wsi_device                       wsi_device;
1135     int                                         local_fd;
1136     int                                         master_fd;
1137 };
1138 
1139 struct anv_app_info {
1140    const char*        app_name;
1141    uint32_t           app_version;
1142    const char*        engine_name;
1143    uint32_t           engine_version;
1144    uint32_t           api_version;
1145 };
1146 
1147 struct anv_instance {
1148     struct vk_object_base                       base;
1149 
1150     VkAllocationCallbacks                       alloc;
1151 
1152     struct anv_app_info                         app_info;
1153 
1154     struct anv_instance_extension_table         enabled_extensions;
1155     struct anv_instance_dispatch_table          dispatch;
1156     struct anv_physical_device_dispatch_table   physical_device_dispatch;
1157     struct anv_device_dispatch_table            device_dispatch;
1158 
1159     bool                                        physical_devices_enumerated;
1160     struct list_head                            physical_devices;
1161 
1162     bool                                        pipeline_cache_enabled;
1163 
1164     struct vk_debug_report_instance             debug_report_callbacks;
1165 
1166     struct driOptionCache                       dri_options;
1167     struct driOptionCache                       available_dri_options;
1168 };
1169 
1170 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
1171 void anv_finish_wsi(struct anv_physical_device *physical_device);
1172 
1173 uint32_t anv_physical_device_api_version(struct anv_physical_device *dev);
1174 bool anv_physical_device_extension_supported(struct anv_physical_device *dev,
1175                                              const char *name);
1176 
1177 struct anv_queue_submit {
1178    struct anv_cmd_buffer *                   cmd_buffer;
1179 
1180    uint32_t                                  fence_count;
1181    uint32_t                                  fence_array_length;
1182    struct drm_i915_gem_exec_fence *          fences;
1183 
1184    uint32_t                                  temporary_semaphore_count;
1185    uint32_t                                  temporary_semaphore_array_length;
1186    struct anv_semaphore_impl *               temporary_semaphores;
1187 
1188    /* Semaphores to be signaled with a SYNC_FD. */
1189    struct anv_semaphore **                   sync_fd_semaphores;
1190    uint32_t                                  sync_fd_semaphore_count;
1191    uint32_t                                  sync_fd_semaphore_array_length;
1192 
1193    /* Allocated only with non shareable timelines. */
1194    struct anv_timeline **                    wait_timelines;
1195    uint32_t                                  wait_timeline_count;
1196    uint32_t                                  wait_timeline_array_length;
1197    uint64_t *                                wait_timeline_values;
1198 
1199    struct anv_timeline **                    signal_timelines;
1200    uint32_t                                  signal_timeline_count;
1201    uint32_t                                  signal_timeline_array_length;
1202    uint64_t *                                signal_timeline_values;
1203 
1204    int                                       in_fence;
1205    bool                                      need_out_fence;
1206    int                                       out_fence;
1207 
1208    uint32_t                                  fence_bo_count;
1209    uint32_t                                  fence_bo_array_length;
1210    /* An array of struct anv_bo pointers with lower bit used as a flag to
1211     * signal we will wait on that BO (see anv_(un)pack_ptr).
1212     */
1213    uintptr_t *                               fence_bos;
1214 
1215    int                                       perf_query_pass;
1216 
1217    const VkAllocationCallbacks *             alloc;
1218    VkSystemAllocationScope                   alloc_scope;
1219 
1220    struct anv_bo *                           simple_bo;
1221    uint32_t                                  simple_bo_size;
1222 
1223    struct list_head                          link;
1224 };
1225 
1226 struct anv_queue {
1227     struct vk_object_base                       base;
1228 
1229     struct anv_device *                         device;
1230 
1231     /*
1232      * A list of struct anv_queue_submit to be submitted to i915.
1233      */
1234     struct list_head                            queued_submits;
1235 
1236     VkDeviceQueueCreateFlags                    flags;
1237 };
1238 
1239 struct anv_pipeline_cache {
1240    struct vk_object_base                        base;
1241    struct anv_device *                          device;
1242    pthread_mutex_t                              mutex;
1243 
1244    struct hash_table *                          nir_cache;
1245 
1246    struct hash_table *                          cache;
1247 
1248    bool                                         external_sync;
1249 };
1250 
1251 struct nir_xfb_info;
1252 struct anv_pipeline_bind_map;
1253 
1254 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
1255                              struct anv_device *device,
1256                              bool cache_enabled,
1257                              bool external_sync);
1258 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
1259 
1260 struct anv_shader_bin *
1261 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
1262                           const void *key, uint32_t key_size);
1263 struct anv_shader_bin *
1264 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
1265                                  gl_shader_stage stage,
1266                                  const void *key_data, uint32_t key_size,
1267                                  const void *kernel_data, uint32_t kernel_size,
1268                                  const void *constant_data,
1269                                  uint32_t constant_data_size,
1270                                  const struct brw_stage_prog_data *prog_data,
1271                                  uint32_t prog_data_size,
1272                                  const struct brw_compile_stats *stats,
1273                                  uint32_t num_stats,
1274                                  const struct nir_xfb_info *xfb_info,
1275                                  const struct anv_pipeline_bind_map *bind_map);
1276 
1277 struct anv_shader_bin *
1278 anv_device_search_for_kernel(struct anv_device *device,
1279                              struct anv_pipeline_cache *cache,
1280                              const void *key_data, uint32_t key_size,
1281                              bool *user_cache_bit);
1282 
1283 struct anv_shader_bin *
1284 anv_device_upload_kernel(struct anv_device *device,
1285                          struct anv_pipeline_cache *cache,
1286                          gl_shader_stage stage,
1287                          const void *key_data, uint32_t key_size,
1288                          const void *kernel_data, uint32_t kernel_size,
1289                          const void *constant_data,
1290                          uint32_t constant_data_size,
1291                          const struct brw_stage_prog_data *prog_data,
1292                          uint32_t prog_data_size,
1293                          const struct brw_compile_stats *stats,
1294                          uint32_t num_stats,
1295                          const struct nir_xfb_info *xfb_info,
1296                          const struct anv_pipeline_bind_map *bind_map);
1297 
1298 struct nir_shader;
1299 struct nir_shader_compiler_options;
1300 
1301 struct nir_shader *
1302 anv_device_search_for_nir(struct anv_device *device,
1303                           struct anv_pipeline_cache *cache,
1304                           const struct nir_shader_compiler_options *nir_options,
1305                           unsigned char sha1_key[20],
1306                           void *mem_ctx);
1307 
1308 void
1309 anv_device_upload_nir(struct anv_device *device,
1310                       struct anv_pipeline_cache *cache,
1311                       const struct nir_shader *nir,
1312                       unsigned char sha1_key[20]);
1313 
1314 struct anv_address {
1315    struct anv_bo *bo;
1316    uint32_t offset;
1317 };
1318 
1319 struct anv_device {
1320     struct vk_device                            vk;
1321 
1322     struct anv_physical_device *                physical;
1323     bool                                        no_hw;
1324     struct gen_device_info                      info;
1325     struct isl_device                           isl_dev;
1326     int                                         context_id;
1327     int                                         fd;
1328     bool                                        can_chain_batches;
1329     bool                                        robust_buffer_access;
1330     struct anv_device_extension_table           enabled_extensions;
1331     struct anv_device_dispatch_table            dispatch;
1332 
1333     pthread_mutex_t                             vma_mutex;
1334     struct util_vma_heap                        vma_lo;
1335     struct util_vma_heap                        vma_cva;
1336     struct util_vma_heap                        vma_hi;
1337 
1338     /** List of all anv_device_memory objects */
1339     struct list_head                            memory_objects;
1340 
1341     struct anv_bo_pool                          batch_bo_pool;
1342 
1343     struct anv_bo_cache                         bo_cache;
1344 
1345     struct anv_state_pool                       dynamic_state_pool;
1346     struct anv_state_pool                       instruction_state_pool;
1347     struct anv_state_pool                       binding_table_pool;
1348     struct anv_state_pool                       surface_state_pool;
1349 
1350     struct anv_state_reserved_pool              custom_border_colors;
1351 
1352     /** BO used for various workarounds
1353      *
1354      * There are a number of workarounds on our hardware which require writing
1355      * data somewhere and it doesn't really matter where.  For that, we use
1356      * this BO and just write to the first dword or so.
1357      *
1358      * We also need to be able to handle NULL buffers bound as pushed UBOs.
1359      * For that, we use the high bytes (>= 1024) of the workaround BO.
1360      */
1361     struct anv_bo *                             workaround_bo;
1362     struct anv_address                          workaround_address;
1363 
1364     struct anv_bo *                             trivial_batch_bo;
1365     struct anv_bo *                             hiz_clear_bo;
1366     struct anv_state                            null_surface_state;
1367 
1368     struct anv_pipeline_cache                   default_pipeline_cache;
1369     struct blorp_context                        blorp;
1370 
1371     struct anv_state                            border_colors;
1372 
1373     struct anv_state                            slice_hash;
1374 
1375     struct anv_queue                            queue;
1376 
1377     struct anv_scratch_pool                     scratch_pool;
1378 
1379     pthread_mutex_t                             mutex;
1380     pthread_cond_t                              queue_submit;
1381     int                                         _lost;
1382 
1383     struct gen_batch_decode_ctx                 decoder_ctx;
1384     /*
1385      * When decoding a anv_cmd_buffer, we might need to search for BOs through
1386      * the cmd_buffer's list.
1387      */
1388     struct anv_cmd_buffer                      *cmd_buffer_being_decoded;
1389 
1390     int                                         perf_fd; /* -1 if no opened */
1391     uint64_t                                    perf_metric; /* 0 if unset */
1392 
1393     struct gen_aux_map_context                  *aux_map_ctx;
1394 };
1395 
1396 static inline struct anv_instance *
anv_device_instance_or_null(const struct anv_device * device)1397 anv_device_instance_or_null(const struct anv_device *device)
1398 {
1399    return device ? device->physical->instance : NULL;
1400 }
1401 
1402 static inline struct anv_state_pool *
anv_binding_table_pool(struct anv_device * device)1403 anv_binding_table_pool(struct anv_device *device)
1404 {
1405    if (device->physical->use_softpin)
1406       return &device->binding_table_pool;
1407    else
1408       return &device->surface_state_pool;
1409 }
1410 
1411 static inline struct anv_state
anv_binding_table_pool_alloc(struct anv_device * device)1412 anv_binding_table_pool_alloc(struct anv_device *device) {
1413    if (device->physical->use_softpin)
1414       return anv_state_pool_alloc(&device->binding_table_pool,
1415                                   device->binding_table_pool.block_size, 0);
1416    else
1417       return anv_state_pool_alloc_back(&device->surface_state_pool);
1418 }
1419 
1420 static inline void
anv_binding_table_pool_free(struct anv_device * device,struct anv_state state)1421 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1422    anv_state_pool_free(anv_binding_table_pool(device), state);
1423 }
1424 
1425 static inline uint32_t
anv_mocs_for_bo(const struct anv_device * device,const struct anv_bo * bo)1426 anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
1427 {
1428    if (bo->is_external)
1429       return device->isl_dev.mocs.external;
1430    else
1431       return device->isl_dev.mocs.internal;
1432 }
1433 
1434 void anv_device_init_blorp(struct anv_device *device);
1435 void anv_device_finish_blorp(struct anv_device *device);
1436 
1437 void _anv_device_set_all_queue_lost(struct anv_device *device);
1438 VkResult _anv_device_set_lost(struct anv_device *device,
1439                               const char *file, int line,
1440                               const char *msg, ...)
1441    anv_printflike(4, 5);
1442 VkResult _anv_queue_set_lost(struct anv_queue *queue,
1443                               const char *file, int line,
1444                               const char *msg, ...)
1445    anv_printflike(4, 5);
1446 #define anv_device_set_lost(dev, ...) \
1447    _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
1448 #define anv_queue_set_lost(queue, ...) \
1449    _anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__)
1450 
1451 static inline bool
anv_device_is_lost(struct anv_device * device)1452 anv_device_is_lost(struct anv_device *device)
1453 {
1454    return unlikely(p_atomic_read(&device->_lost));
1455 }
1456 
1457 VkResult anv_device_query_status(struct anv_device *device);
1458 
1459 
1460 enum anv_bo_alloc_flags {
1461    /** Specifies that the BO must have a 32-bit address
1462     *
1463     * This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.
1464     */
1465    ANV_BO_ALLOC_32BIT_ADDRESS =  (1 << 0),
1466 
1467    /** Specifies that the BO may be shared externally */
1468    ANV_BO_ALLOC_EXTERNAL =       (1 << 1),
1469 
1470    /** Specifies that the BO should be mapped */
1471    ANV_BO_ALLOC_MAPPED =         (1 << 2),
1472 
1473    /** Specifies that the BO should be snooped so we get coherency */
1474    ANV_BO_ALLOC_SNOOPED =        (1 << 3),
1475 
1476    /** Specifies that the BO should be captured in error states */
1477    ANV_BO_ALLOC_CAPTURE =        (1 << 4),
1478 
1479    /** Specifies that the BO will have an address assigned by the caller
1480     *
1481     * Such BOs do not exist in any VMA heap.
1482     */
1483    ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),
1484 
1485    /** Enables implicit synchronization on the BO
1486     *
1487     * This is the opposite of EXEC_OBJECT_ASYNC.
1488     */
1489    ANV_BO_ALLOC_IMPLICIT_SYNC =  (1 << 6),
1490 
1491    /** Enables implicit synchronization on the BO
1492     *
1493     * This is equivalent to EXEC_OBJECT_WRITE.
1494     */
1495    ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),
1496 
1497    /** Has an address which is visible to the client */
1498    ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),
1499 
1500    /** This buffer has implicit CCS data attached to it */
1501    ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),
1502 };
1503 
1504 VkResult anv_device_alloc_bo(struct anv_device *device, uint64_t size,
1505                              enum anv_bo_alloc_flags alloc_flags,
1506                              uint64_t explicit_address,
1507                              struct anv_bo **bo);
1508 VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,
1509                                             void *host_ptr, uint32_t size,
1510                                             enum anv_bo_alloc_flags alloc_flags,
1511                                             uint64_t client_address,
1512                                             struct anv_bo **bo_out);
1513 VkResult anv_device_import_bo(struct anv_device *device, int fd,
1514                               enum anv_bo_alloc_flags alloc_flags,
1515                               uint64_t client_address,
1516                               struct anv_bo **bo);
1517 VkResult anv_device_export_bo(struct anv_device *device,
1518                               struct anv_bo *bo, int *fd_out);
1519 void anv_device_release_bo(struct anv_device *device,
1520                            struct anv_bo *bo);
1521 
1522 static inline struct anv_bo *
anv_device_lookup_bo(struct anv_device * device,uint32_t gem_handle)1523 anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
1524 {
1525    return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
1526 }
1527 
1528 VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
1529 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1530                          int64_t timeout);
1531 
1532 VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue);
1533 void anv_queue_finish(struct anv_queue *queue);
1534 
1535 VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
1536 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
1537                                        struct anv_batch *batch);
1538 
1539 uint64_t anv_gettime_ns(void);
1540 uint64_t anv_get_absolute_timeout(uint64_t timeout);
1541 
1542 void* anv_gem_mmap(struct anv_device *device,
1543                    uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1544 void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
1545 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1546 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1547 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1548 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1549 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1550 int anv_gem_execbuffer(struct anv_device *device,
1551                        struct drm_i915_gem_execbuffer2 *execbuf);
1552 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1553                        uint32_t stride, uint32_t tiling);
1554 int anv_gem_create_context(struct anv_device *device);
1555 bool anv_gem_has_context_priority(int fd);
1556 int anv_gem_destroy_context(struct anv_device *device, int context);
1557 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1558                               uint64_t value);
1559 int anv_gem_get_context_param(int fd, int context, uint32_t param,
1560                               uint64_t *value);
1561 int anv_gem_get_param(int fd, uint32_t param);
1562 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1563 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
1564 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
1565                                 uint32_t *active, uint32_t *pending);
1566 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1567 int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);
1568 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1569 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1570 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1571                        uint32_t read_domains, uint32_t write_domain);
1572 int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);
1573 uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);
1574 void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);
1575 int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);
1576 uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);
1577 int anv_gem_syncobj_export_sync_file(struct anv_device *device,
1578                                      uint32_t handle);
1579 int anv_gem_syncobj_import_sync_file(struct anv_device *device,
1580                                      uint32_t handle, int fd);
1581 void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
1582 bool anv_gem_supports_syncobj_wait(int fd);
1583 int anv_gem_syncobj_wait(struct anv_device *device,
1584                          uint32_t *handles, uint32_t num_handles,
1585                          int64_t abs_timeout_ns, bool wait_all);
1586 
1587 uint64_t anv_vma_alloc(struct anv_device *device,
1588                        uint64_t size, uint64_t align,
1589                        enum anv_bo_alloc_flags alloc_flags,
1590                        uint64_t client_address);
1591 void anv_vma_free(struct anv_device *device,
1592                   uint64_t address, uint64_t size);
1593 
1594 struct anv_reloc_list {
1595    uint32_t                                     num_relocs;
1596    uint32_t                                     array_length;
1597    struct drm_i915_gem_relocation_entry *       relocs;
1598    struct anv_bo **                             reloc_bos;
1599    uint32_t                                     dep_words;
1600    BITSET_WORD *                                deps;
1601 };
1602 
1603 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1604                              const VkAllocationCallbacks *alloc);
1605 void anv_reloc_list_finish(struct anv_reloc_list *list,
1606                            const VkAllocationCallbacks *alloc);
1607 
1608 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1609                             const VkAllocationCallbacks *alloc,
1610                             uint32_t offset, struct anv_bo *target_bo,
1611                             uint32_t delta, uint64_t *address_u64_out);
1612 
1613 struct anv_batch_bo {
1614    /* Link in the anv_cmd_buffer.owned_batch_bos list */
1615    struct list_head                             link;
1616 
1617    struct anv_bo *                              bo;
1618 
1619    /* Bytes actually consumed in this batch BO */
1620    uint32_t                                     length;
1621 
1622    struct anv_reloc_list                        relocs;
1623 };
1624 
1625 struct anv_batch {
1626    const VkAllocationCallbacks *                alloc;
1627 
1628    struct anv_address                           start_addr;
1629 
1630    void *                                       start;
1631    void *                                       end;
1632    void *                                       next;
1633 
1634    struct anv_reloc_list *                      relocs;
1635 
1636    /* This callback is called (with the associated user data) in the event
1637     * that the batch runs out of space.
1638     */
1639    VkResult (*extend_cb)(struct anv_batch *, void *);
1640    void *                                       user_data;
1641 
1642    /**
1643     * Current error status of the command buffer. Used to track inconsistent
1644     * or incomplete command buffer states that are the consequence of run-time
1645     * errors such as out of memory scenarios. We want to track this in the
1646     * batch because the command buffer object is not visible to some parts
1647     * of the driver.
1648     */
1649    VkResult                                     status;
1650 };
1651 
1652 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1653 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1654 uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
1655                               void *location, struct anv_bo *bo, uint32_t offset);
1656 struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);
1657 
1658 static inline void
anv_batch_set_storage(struct anv_batch * batch,struct anv_address addr,void * map,size_t size)1659 anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,
1660                       void *map, size_t size)
1661 {
1662    batch->start_addr = addr;
1663    batch->next = batch->start = map;
1664    batch->end = map + size;
1665 }
1666 
1667 static inline VkResult
anv_batch_set_error(struct anv_batch * batch,VkResult error)1668 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1669 {
1670    assert(error != VK_SUCCESS);
1671    if (batch->status == VK_SUCCESS)
1672       batch->status = error;
1673    return batch->status;
1674 }
1675 
1676 static inline bool
anv_batch_has_error(struct anv_batch * batch)1677 anv_batch_has_error(struct anv_batch *batch)
1678 {
1679    return batch->status != VK_SUCCESS;
1680 }
1681 
1682 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
1683 
1684 static inline bool
anv_address_is_null(struct anv_address addr)1685 anv_address_is_null(struct anv_address addr)
1686 {
1687    return addr.bo == NULL && addr.offset == 0;
1688 }
1689 
1690 static inline uint64_t
anv_address_physical(struct anv_address addr)1691 anv_address_physical(struct anv_address addr)
1692 {
1693    if (addr.bo && (addr.bo->flags & EXEC_OBJECT_PINNED))
1694       return gen_canonical_address(addr.bo->offset + addr.offset);
1695    else
1696       return gen_canonical_address(addr.offset);
1697 }
1698 
1699 static inline struct anv_address
anv_address_add(struct anv_address addr,uint64_t offset)1700 anv_address_add(struct anv_address addr, uint64_t offset)
1701 {
1702    addr.offset += offset;
1703    return addr;
1704 }
1705 
1706 static inline void
write_reloc(const struct anv_device * device,void * p,uint64_t v,bool flush)1707 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1708 {
1709    unsigned reloc_size = 0;
1710    if (device->info.gen >= 8) {
1711       reloc_size = sizeof(uint64_t);
1712       *(uint64_t *)p = gen_canonical_address(v);
1713    } else {
1714       reloc_size = sizeof(uint32_t);
1715       *(uint32_t *)p = v;
1716    }
1717 
1718    if (flush && !device->info.has_llc)
1719       gen_flush_range(p, reloc_size);
1720 }
1721 
1722 static inline uint64_t
_anv_combine_address(struct anv_batch * batch,void * location,const struct anv_address address,uint32_t delta)1723 _anv_combine_address(struct anv_batch *batch, void *location,
1724                      const struct anv_address address, uint32_t delta)
1725 {
1726    if (address.bo == NULL) {
1727       return address.offset + delta;
1728    } else {
1729       assert(batch->start <= location && location < batch->end);
1730 
1731       return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1732    }
1733 }
1734 
1735 #define __gen_address_type struct anv_address
1736 #define __gen_user_data struct anv_batch
1737 #define __gen_combine_address _anv_combine_address
1738 
1739 /* Wrapper macros needed to work around preprocessor argument issues.  In
1740  * particular, arguments don't get pre-evaluated if they are concatenated.
1741  * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1742  * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1743  * We can work around this easily enough with these helpers.
1744  */
1745 #define __anv_cmd_length(cmd) cmd ## _length
1746 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1747 #define __anv_cmd_header(cmd) cmd ## _header
1748 #define __anv_cmd_pack(cmd) cmd ## _pack
1749 #define __anv_reg_num(reg) reg ## _num
1750 
1751 #define anv_pack_struct(dst, struc, ...) do {                              \
1752       struct struc __template = {                                          \
1753          __VA_ARGS__                                                       \
1754       };                                                                   \
1755       __anv_cmd_pack(struc)(NULL, dst, &__template);                       \
1756       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1757    } while (0)
1758 
1759 #define anv_batch_emitn(batch, n, cmd, ...) ({             \
1760       void *__dst = anv_batch_emit_dwords(batch, n);       \
1761       if (__dst) {                                         \
1762          struct cmd __template = {                         \
1763             __anv_cmd_header(cmd),                         \
1764            .DWordLength = n - __anv_cmd_length_bias(cmd),  \
1765             __VA_ARGS__                                    \
1766          };                                                \
1767          __anv_cmd_pack(cmd)(batch, __dst, &__template);   \
1768       }                                                    \
1769       __dst;                                               \
1770    })
1771 
1772 #define anv_batch_emit_merge(batch, dwords0, dwords1)                   \
1773    do {                                                                 \
1774       uint32_t *dw;                                                     \
1775                                                                         \
1776       STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1));        \
1777       dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0));         \
1778       if (!dw)                                                          \
1779          break;                                                         \
1780       for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)                \
1781          dw[i] = (dwords0)[i] | (dwords1)[i];                           \
1782       VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1783    } while (0)
1784 
1785 #define anv_batch_emit(batch, cmd, name)                            \
1786    for (struct cmd name = { __anv_cmd_header(cmd) },                    \
1787         *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd));    \
1788         __builtin_expect(_dst != NULL, 1);                              \
1789         ({ __anv_cmd_pack(cmd)(batch, _dst, &name);                     \
1790            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1791            _dst = NULL;                                                 \
1792          }))
1793 
1794 /* #define __gen_get_batch_dwords anv_batch_emit_dwords */
1795 /* #define __gen_get_batch_address anv_batch_address */
1796 /* #define __gen_address_value anv_address_physical */
1797 /* #define __gen_address_offset anv_address_add */
1798 
1799 struct anv_device_memory {
1800    struct vk_object_base                        base;
1801 
1802    struct list_head                             link;
1803 
1804    struct anv_bo *                              bo;
1805    struct anv_memory_type *                     type;
1806    VkDeviceSize                                 map_size;
1807    void *                                       map;
1808 
1809    /* If set, we are holding reference to AHardwareBuffer
1810     * which we must release when memory is freed.
1811     */
1812    struct AHardwareBuffer *                     ahw;
1813 
1814    /* If set, this memory comes from a host pointer. */
1815    void *                                       host_ptr;
1816 };
1817 
1818 /**
1819  * Header for Vertex URB Entry (VUE)
1820  */
1821 struct anv_vue_header {
1822    uint32_t Reserved;
1823    uint32_t RTAIndex; /* RenderTargetArrayIndex */
1824    uint32_t ViewportIndex;
1825    float PointWidth;
1826 };
1827 
1828 /** Struct representing a sampled image descriptor
1829  *
1830  * This descriptor layout is used for sampled images, bare sampler, and
1831  * combined image/sampler descriptors.
1832  */
1833 struct anv_sampled_image_descriptor {
1834    /** Bindless image handle
1835     *
1836     * This is expected to already be shifted such that the 20-bit
1837     * SURFACE_STATE table index is in the top 20 bits.
1838     */
1839    uint32_t image;
1840 
1841    /** Bindless sampler handle
1842     *
1843     * This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative
1844     * to the dynamic state base address.
1845     */
1846    uint32_t sampler;
1847 };
1848 
1849 struct anv_texture_swizzle_descriptor {
1850    /** Texture swizzle
1851     *
1852     * See also nir_intrinsic_channel_select_intel
1853     */
1854    uint8_t swizzle[4];
1855 
1856    /** Unused padding to ensure the struct is a multiple of 64 bits */
1857    uint32_t _pad;
1858 };
1859 
1860 /** Struct representing a storage image descriptor */
1861 struct anv_storage_image_descriptor {
1862    /** Bindless image handles
1863     *
1864     * These are expected to already be shifted such that the 20-bit
1865     * SURFACE_STATE table index is in the top 20 bits.
1866     */
1867    uint32_t read_write;
1868    uint32_t write_only;
1869 };
1870 
1871 /** Struct representing a address/range descriptor
1872  *
1873  * The fields of this struct correspond directly to the data layout of
1874  * nir_address_format_64bit_bounded_global addresses.  The last field is the
1875  * offset in the NIR address so it must be zero so that when you load the
1876  * descriptor you get a pointer to the start of the range.
1877  */
1878 struct anv_address_range_descriptor {
1879    uint64_t address;
1880    uint32_t range;
1881    uint32_t zero;
1882 };
1883 
1884 enum anv_descriptor_data {
1885    /** The descriptor contains a BTI reference to a surface state */
1886    ANV_DESCRIPTOR_SURFACE_STATE  = (1 << 0),
1887    /** The descriptor contains a BTI reference to a sampler state */
1888    ANV_DESCRIPTOR_SAMPLER_STATE  = (1 << 1),
1889    /** The descriptor contains an actual buffer view */
1890    ANV_DESCRIPTOR_BUFFER_VIEW    = (1 << 2),
1891    /** The descriptor contains auxiliary image layout data */
1892    ANV_DESCRIPTOR_IMAGE_PARAM    = (1 << 3),
1893    /** The descriptor contains auxiliary image layout data */
1894    ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),
1895    /** anv_address_range_descriptor with a buffer address and range */
1896    ANV_DESCRIPTOR_ADDRESS_RANGE  = (1 << 5),
1897    /** Bindless surface handle */
1898    ANV_DESCRIPTOR_SAMPLED_IMAGE  = (1 << 6),
1899    /** Storage image handles */
1900    ANV_DESCRIPTOR_STORAGE_IMAGE  = (1 << 7),
1901    /** Storage image handles */
1902    ANV_DESCRIPTOR_TEXTURE_SWIZZLE  = (1 << 8),
1903 };
1904 
1905 struct anv_descriptor_set_binding_layout {
1906 #ifndef NDEBUG
1907    /* The type of the descriptors in this binding */
1908    VkDescriptorType type;
1909 #endif
1910 
1911    /* Flags provided when this binding was created */
1912    VkDescriptorBindingFlagsEXT flags;
1913 
1914    /* Bitfield representing the type of data this descriptor contains */
1915    enum anv_descriptor_data data;
1916 
1917    /* Maximum number of YCbCr texture/sampler planes */
1918    uint8_t max_plane_count;
1919 
1920    /* Number of array elements in this binding (or size in bytes for inline
1921     * uniform data)
1922     */
1923    uint16_t array_size;
1924 
1925    /* Index into the flattend descriptor set */
1926    uint16_t descriptor_index;
1927 
1928    /* Index into the dynamic state array for a dynamic buffer */
1929    int16_t dynamic_offset_index;
1930 
1931    /* Index into the descriptor set buffer views */
1932    int16_t buffer_view_index;
1933 
1934    /* Offset into the descriptor buffer where this descriptor lives */
1935    uint32_t descriptor_offset;
1936 
1937    /* Immutable samplers (or NULL if no immutable samplers) */
1938    struct anv_sampler **immutable_samplers;
1939 };
1940 
1941 unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
1942 
1943 unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
1944                                   VkDescriptorType type);
1945 
1946 bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,
1947                                       const struct anv_descriptor_set_binding_layout *binding,
1948                                       bool sampler);
1949 
1950 bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,
1951                                       const struct anv_descriptor_set_binding_layout *binding,
1952                                       bool sampler);
1953 
1954 struct anv_descriptor_set_layout {
1955    struct vk_object_base base;
1956 
1957    /* Descriptor set layouts can be destroyed at almost any time */
1958    uint32_t ref_cnt;
1959 
1960    /* Number of bindings in this descriptor set */
1961    uint16_t binding_count;
1962 
1963    /* Total size of the descriptor set with room for all array entries */
1964    uint16_t size;
1965 
1966    /* Shader stages affected by this descriptor set */
1967    uint16_t shader_stages;
1968 
1969    /* Number of buffer views in this descriptor set */
1970    uint16_t buffer_view_count;
1971 
1972    /* Number of dynamic offsets used by this descriptor set */
1973    uint16_t dynamic_offset_count;
1974 
1975    /* For each shader stage, which offsets apply to that stage */
1976    uint16_t stage_dynamic_offsets[MESA_SHADER_STAGES];
1977 
1978    /* Size of the descriptor buffer for this descriptor set */
1979    uint32_t descriptor_buffer_size;
1980 
1981    /* Bindings in this descriptor set */
1982    struct anv_descriptor_set_binding_layout binding[0];
1983 };
1984 
1985 void anv_descriptor_set_layout_destroy(struct anv_device *device,
1986                                        struct anv_descriptor_set_layout *layout);
1987 
1988 static inline void
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout * layout)1989 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1990 {
1991    assert(layout && layout->ref_cnt >= 1);
1992    p_atomic_inc(&layout->ref_cnt);
1993 }
1994 
1995 static inline void
anv_descriptor_set_layout_unref(struct anv_device * device,struct anv_descriptor_set_layout * layout)1996 anv_descriptor_set_layout_unref(struct anv_device *device,
1997                                 struct anv_descriptor_set_layout *layout)
1998 {
1999    assert(layout && layout->ref_cnt >= 1);
2000    if (p_atomic_dec_zero(&layout->ref_cnt))
2001       anv_descriptor_set_layout_destroy(device, layout);
2002 }
2003 
2004 struct anv_descriptor {
2005    VkDescriptorType type;
2006 
2007    union {
2008       struct {
2009          VkImageLayout layout;
2010          struct anv_image_view *image_view;
2011          struct anv_sampler *sampler;
2012       };
2013 
2014       struct {
2015          struct anv_buffer *buffer;
2016          uint64_t offset;
2017          uint64_t range;
2018       };
2019 
2020       struct anv_buffer_view *buffer_view;
2021    };
2022 };
2023 
2024 struct anv_descriptor_set {
2025    struct vk_object_base base;
2026 
2027    struct anv_descriptor_pool *pool;
2028    struct anv_descriptor_set_layout *layout;
2029 
2030    /* Amount of space occupied in the the pool by this descriptor set. It can
2031     * be larger than the size of the descriptor set.
2032     */
2033    uint32_t size;
2034 
2035    /* State relative to anv_descriptor_pool::bo */
2036    struct anv_state desc_mem;
2037    /* Surface state for the descriptor buffer */
2038    struct anv_state desc_surface_state;
2039 
2040    uint32_t buffer_view_count;
2041    struct anv_buffer_view *buffer_views;
2042 
2043    /* Link to descriptor pool's desc_sets list . */
2044    struct list_head pool_link;
2045 
2046    struct anv_descriptor descriptors[0];
2047 };
2048 
2049 struct anv_buffer_view {
2050    struct vk_object_base base;
2051 
2052    enum isl_format format; /**< VkBufferViewCreateInfo::format */
2053    uint64_t range; /**< VkBufferViewCreateInfo::range */
2054 
2055    struct anv_address address;
2056 
2057    struct anv_state surface_state;
2058    struct anv_state storage_surface_state;
2059    struct anv_state writeonly_storage_surface_state;
2060 
2061    struct brw_image_param storage_image_param;
2062 };
2063 
2064 struct anv_push_descriptor_set {
2065    struct anv_descriptor_set set;
2066 
2067    /* Put this field right behind anv_descriptor_set so it fills up the
2068     * descriptors[0] field. */
2069    struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
2070 
2071    /** True if the descriptor set buffer has been referenced by a draw or
2072     * dispatch command.
2073     */
2074    bool set_used_on_gpu;
2075 
2076    struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
2077 };
2078 
2079 struct anv_descriptor_pool {
2080    struct vk_object_base base;
2081 
2082    uint32_t size;
2083    uint32_t next;
2084    uint32_t free_list;
2085 
2086    struct anv_bo *bo;
2087    struct util_vma_heap bo_heap;
2088 
2089    struct anv_state_stream surface_state_stream;
2090    void *surface_state_free_list;
2091 
2092    struct list_head desc_sets;
2093 
2094    char data[0];
2095 };
2096 
2097 enum anv_descriptor_template_entry_type {
2098    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,
2099    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,
2100    ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW
2101 };
2102 
2103 struct anv_descriptor_template_entry {
2104    /* The type of descriptor in this entry */
2105    VkDescriptorType type;
2106 
2107    /* Binding in the descriptor set */
2108    uint32_t binding;
2109 
2110    /* Offset at which to write into the descriptor set binding */
2111    uint32_t array_element;
2112 
2113    /* Number of elements to write into the descriptor set binding */
2114    uint32_t array_count;
2115 
2116    /* Offset into the user provided data */
2117    size_t offset;
2118 
2119    /* Stride between elements into the user provided data */
2120    size_t stride;
2121 };
2122 
2123 struct anv_descriptor_update_template {
2124     struct vk_object_base base;
2125 
2126     VkPipelineBindPoint bind_point;
2127 
2128    /* The descriptor set this template corresponds to. This value is only
2129     * valid if the template was created with the templateType
2130     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
2131     */
2132    uint8_t set;
2133 
2134    /* Number of entries in this template */
2135    uint32_t entry_count;
2136 
2137    /* Entries of the template */
2138    struct anv_descriptor_template_entry entries[0];
2139 };
2140 
2141 size_t
2142 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout);
2143 
2144 void
2145 anv_descriptor_set_write_image_view(struct anv_device *device,
2146                                     struct anv_descriptor_set *set,
2147                                     const VkDescriptorImageInfo * const info,
2148                                     VkDescriptorType type,
2149                                     uint32_t binding,
2150                                     uint32_t element);
2151 
2152 void
2153 anv_descriptor_set_write_buffer_view(struct anv_device *device,
2154                                      struct anv_descriptor_set *set,
2155                                      VkDescriptorType type,
2156                                      struct anv_buffer_view *buffer_view,
2157                                      uint32_t binding,
2158                                      uint32_t element);
2159 
2160 void
2161 anv_descriptor_set_write_buffer(struct anv_device *device,
2162                                 struct anv_descriptor_set *set,
2163                                 struct anv_state_stream *alloc_stream,
2164                                 VkDescriptorType type,
2165                                 struct anv_buffer *buffer,
2166                                 uint32_t binding,
2167                                 uint32_t element,
2168                                 VkDeviceSize offset,
2169                                 VkDeviceSize range);
2170 void
2171 anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,
2172                                              struct anv_descriptor_set *set,
2173                                              uint32_t binding,
2174                                              const void *data,
2175                                              size_t offset,
2176                                              size_t size);
2177 
2178 void
2179 anv_descriptor_set_write_template(struct anv_device *device,
2180                                   struct anv_descriptor_set *set,
2181                                   struct anv_state_stream *alloc_stream,
2182                                   const struct anv_descriptor_update_template *template,
2183                                   const void *data);
2184 
2185 VkResult
2186 anv_descriptor_set_create(struct anv_device *device,
2187                           struct anv_descriptor_pool *pool,
2188                           struct anv_descriptor_set_layout *layout,
2189                           struct anv_descriptor_set **out_set);
2190 
2191 void
2192 anv_descriptor_set_destroy(struct anv_device *device,
2193                            struct anv_descriptor_pool *pool,
2194                            struct anv_descriptor_set *set);
2195 
2196 #define ANV_DESCRIPTOR_SET_NULL             (UINT8_MAX - 5)
2197 #define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS   (UINT8_MAX - 4)
2198 #define ANV_DESCRIPTOR_SET_DESCRIPTORS      (UINT8_MAX - 3)
2199 #define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS  (UINT8_MAX - 2)
2200 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
2201 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
2202 
2203 struct anv_pipeline_binding {
2204    /** Index in the descriptor set
2205     *
2206     * This is a flattened index; the descriptor set layout is already taken
2207     * into account.
2208     */
2209    uint32_t index;
2210 
2211    /** The descriptor set this surface corresponds to.
2212     *
2213     * The special ANV_DESCRIPTOR_SET_* values above indicates that this
2214     * binding is not a normal descriptor set but something else.
2215     */
2216    uint8_t set;
2217 
2218    union {
2219       /** Plane in the binding index for images */
2220       uint8_t plane;
2221 
2222       /** Input attachment index (relative to the subpass) */
2223       uint8_t input_attachment_index;
2224 
2225       /** Dynamic offset index (for dynamic UBOs and SSBOs) */
2226       uint8_t dynamic_offset_index;
2227    };
2228 
2229    /** For a storage image, whether it is write-only */
2230    uint8_t write_only;
2231 
2232    /** Pad to 64 bits so that there are no holes and we can safely memcmp
2233     * assuming POD zero-initialization.
2234     */
2235    uint8_t pad;
2236 };
2237 
2238 struct anv_push_range {
2239    /** Index in the descriptor set */
2240    uint32_t index;
2241 
2242    /** Descriptor set index */
2243    uint8_t set;
2244 
2245    /** Dynamic offset index (for dynamic UBOs) */
2246    uint8_t dynamic_offset_index;
2247 
2248    /** Start offset in units of 32B */
2249    uint8_t start;
2250 
2251    /** Range in units of 32B */
2252    uint8_t length;
2253 };
2254 
2255 struct anv_pipeline_layout {
2256    struct vk_object_base base;
2257 
2258    struct {
2259       struct anv_descriptor_set_layout *layout;
2260       uint32_t dynamic_offset_start;
2261    } set[MAX_SETS];
2262 
2263    uint32_t num_sets;
2264 
2265    unsigned char sha1[20];
2266 };
2267 
2268 struct anv_buffer {
2269    struct vk_object_base                        base;
2270 
2271    struct anv_device *                          device;
2272    VkDeviceSize                                 size;
2273 
2274    VkBufferUsageFlags                           usage;
2275 
2276    /* Set when bound */
2277    struct anv_address                           address;
2278 };
2279 
2280 static inline uint64_t
anv_buffer_get_range(struct anv_buffer * buffer,uint64_t offset,uint64_t range)2281 anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)
2282 {
2283    assert(offset <= buffer->size);
2284    if (range == VK_WHOLE_SIZE) {
2285       return buffer->size - offset;
2286    } else {
2287       assert(range + offset >= range);
2288       assert(range + offset <= buffer->size);
2289       return range;
2290    }
2291 }
2292 
2293 enum anv_cmd_dirty_bits {
2294    ANV_CMD_DIRTY_DYNAMIC_VIEWPORT                    = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
2295    ANV_CMD_DIRTY_DYNAMIC_SCISSOR                     = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
2296    ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH                  = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
2297    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS                  = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
2298    ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS             = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
2299    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS                = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
2300    ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK        = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
2301    ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK          = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
2302    ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE           = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
2303    ANV_CMD_DIRTY_PIPELINE                            = 1 << 9,
2304    ANV_CMD_DIRTY_INDEX_BUFFER                        = 1 << 10,
2305    ANV_CMD_DIRTY_RENDER_TARGETS                      = 1 << 11,
2306    ANV_CMD_DIRTY_XFB_ENABLE                          = 1 << 12,
2307    ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE                = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */
2308    ANV_CMD_DIRTY_DYNAMIC_CULL_MODE                   = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */
2309    ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE                  = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */
2310    ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY          = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */
2311    ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */
2312    ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE           = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */
2313    ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE          = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */
2314    ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP            = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */
2315    ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE    = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */
2316    ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE         = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */
2317    ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP                  = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */
2318 };
2319 typedef uint32_t anv_cmd_dirty_mask_t;
2320 
2321 #define ANV_CMD_DIRTY_DYNAMIC_ALL                       \
2322    (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |                    \
2323     ANV_CMD_DIRTY_DYNAMIC_SCISSOR |                     \
2324     ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |                  \
2325     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |                  \
2326     ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |             \
2327     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS |                \
2328     ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |        \
2329     ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |          \
2330     ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |           \
2331     ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE |                \
2332     ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |                   \
2333     ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |                  \
2334     ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |          \
2335     ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \
2336     ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |           \
2337     ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |          \
2338     ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |            \
2339     ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |    \
2340     ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |         \
2341     ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
2342 
2343 static inline enum anv_cmd_dirty_bits
anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)2344 anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)
2345 {
2346    switch (vk_state) {
2347    case VK_DYNAMIC_STATE_VIEWPORT:
2348    case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
2349       return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;
2350    case VK_DYNAMIC_STATE_SCISSOR:
2351    case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
2352       return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;
2353    case VK_DYNAMIC_STATE_LINE_WIDTH:
2354       return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
2355    case VK_DYNAMIC_STATE_DEPTH_BIAS:
2356       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
2357    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
2358       return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
2359    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
2360       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
2361    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
2362       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
2363    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
2364       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
2365    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
2366       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
2367    case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
2368       return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
2369    case VK_DYNAMIC_STATE_CULL_MODE_EXT:
2370       return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;
2371    case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
2372       return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
2373    case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
2374       return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
2375    case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
2376       return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
2377    case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
2378       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
2379    case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
2380       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
2381    case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
2382       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
2383    case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
2384       return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
2385    case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
2386       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
2387    case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
2388       return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
2389    default:
2390       assert(!"Unsupported dynamic state");
2391       return 0;
2392    }
2393 }
2394 
2395 
2396 enum anv_pipe_bits {
2397    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT            = (1 << 0),
2398    ANV_PIPE_STALL_AT_SCOREBOARD_BIT          = (1 << 1),
2399    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT       = (1 << 2),
2400    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT    = (1 << 3),
2401    ANV_PIPE_VF_CACHE_INVALIDATE_BIT          = (1 << 4),
2402    ANV_PIPE_DATA_CACHE_FLUSH_BIT             = (1 << 5),
2403    ANV_PIPE_TILE_CACHE_FLUSH_BIT             = (1 << 6),
2404    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT     = (1 << 10),
2405    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
2406    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT    = (1 << 12),
2407    ANV_PIPE_DEPTH_STALL_BIT                  = (1 << 13),
2408    ANV_PIPE_CS_STALL_BIT                     = (1 << 20),
2409    ANV_PIPE_END_OF_PIPE_SYNC_BIT             = (1 << 21),
2410 
2411    /* This bit does not exist directly in PIPE_CONTROL.  Instead it means that
2412     * a flush has happened but not a CS stall.  The next time we do any sort
2413     * of invalidation we need to insert a CS stall at that time.  Otherwise,
2414     * we would have to CS stall on every flush which could be bad.
2415     */
2416    ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT       = (1 << 22),
2417 
2418    /* This bit does not exist directly in PIPE_CONTROL. It means that render
2419     * target operations related to transfer commands with VkBuffer as
2420     * destination are ongoing. Some operations like copies on the command
2421     * streamer might need to be aware of this to trigger the appropriate stall
2422     * before they can proceed with the copy.
2423     */
2424    ANV_PIPE_RENDER_TARGET_BUFFER_WRITES      = (1 << 23),
2425 
2426    /* This bit does not exist directly in PIPE_CONTROL. It means that Gen12
2427     * AUX-TT data has changed and we need to invalidate AUX-TT data.  This is
2428     * done by writing the AUX-TT register.
2429     */
2430    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT         = (1 << 24),
2431 
2432    /* This bit does not exist directly in PIPE_CONTROL. It means that a
2433     * PIPE_CONTROL with a post-sync operation will follow. This is used to
2434     * implement a workaround for Gen9.
2435     */
2436    ANV_PIPE_POST_SYNC_BIT                    = (1 << 25),
2437 };
2438 
2439 #define ANV_PIPE_FLUSH_BITS ( \
2440    ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
2441    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2442    ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \
2443    ANV_PIPE_TILE_CACHE_FLUSH_BIT)
2444 
2445 #define ANV_PIPE_STALL_BITS ( \
2446    ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
2447    ANV_PIPE_DEPTH_STALL_BIT | \
2448    ANV_PIPE_CS_STALL_BIT)
2449 
2450 #define ANV_PIPE_INVALIDATE_BITS ( \
2451    ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
2452    ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
2453    ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
2454    ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
2455    ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
2456    ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \
2457    ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)
2458 
2459 static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags)2460 anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags)
2461 {
2462    enum anv_pipe_bits pipe_bits = 0;
2463 
2464    unsigned b;
2465    for_each_bit(b, flags) {
2466       switch ((VkAccessFlagBits)(1 << b)) {
2467       case VK_ACCESS_SHADER_WRITE_BIT:
2468          /* We're transitioning a buffer that was previously used as write
2469           * destination through the data port. To make its content available
2470           * to future operations, flush the data cache.
2471           */
2472          pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
2473          break;
2474       case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
2475          /* We're transitioning a buffer that was previously used as render
2476           * target. To make its content available to future operations, flush
2477           * the render target cache.
2478           */
2479          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2480          break;
2481       case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
2482          /* We're transitioning a buffer that was previously used as depth
2483           * buffer. To make its content available to future operations, flush
2484           * the depth cache.
2485           */
2486          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2487          break;
2488       case VK_ACCESS_TRANSFER_WRITE_BIT:
2489          /* We're transitioning a buffer that was previously used as a
2490           * transfer write destination. Generic write operations include color
2491           * & depth operations as well as buffer operations like :
2492           *     - vkCmdClearColorImage()
2493           *     - vkCmdClearDepthStencilImage()
2494           *     - vkCmdBlitImage()
2495           *     - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
2496           *
2497           * Most of these operations are implemented using Blorp which writes
2498           * through the render target, so flush that cache to make it visible
2499           * to future operations. And for depth related operations we also
2500           * need to flush the depth cache.
2501           */
2502          pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
2503          pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
2504          break;
2505       case VK_ACCESS_MEMORY_WRITE_BIT:
2506          /* We're transitioning a buffer for generic write operations. Flush
2507           * all the caches.
2508           */
2509          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2510          break;
2511       default:
2512          break; /* Nothing to do */
2513       }
2514    }
2515 
2516    return pipe_bits;
2517 }
2518 
2519 static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags flags)2520 anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags flags)
2521 {
2522    enum anv_pipe_bits pipe_bits = 0;
2523 
2524    unsigned b;
2525    for_each_bit(b, flags) {
2526       switch ((VkAccessFlagBits)(1 << b)) {
2527       case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
2528          /* Indirect draw commands take a buffer as input that we're going to
2529           * read from the command streamer to load some of the HW registers
2530           * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
2531           * command streamer stall so that all the cache flushes have
2532           * completed before the command streamer loads from memory.
2533           */
2534          pipe_bits |=  ANV_PIPE_CS_STALL_BIT;
2535          /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
2536           * through a vertex buffer, so invalidate that cache.
2537           */
2538          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2539          /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
2540           * UBO from the buffer, so we need to invalidate constant cache.
2541           */
2542          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2543          break;
2544       case VK_ACCESS_INDEX_READ_BIT:
2545       case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
2546          /* We transitioning a buffer to be used for as input for vkCmdDraw*
2547           * commands, so we invalidate the VF cache to make sure there is no
2548           * stale data when we start rendering.
2549           */
2550          pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
2551          break;
2552       case VK_ACCESS_UNIFORM_READ_BIT:
2553          /* We transitioning a buffer to be used as uniform data. Because
2554           * uniform is accessed through the data port & sampler, we need to
2555           * invalidate the texture cache (sampler) & constant cache (data
2556           * port) to avoid stale data.
2557           */
2558          pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
2559          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2560          break;
2561       case VK_ACCESS_SHADER_READ_BIT:
2562       case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
2563       case VK_ACCESS_TRANSFER_READ_BIT:
2564          /* Transitioning a buffer to be read through the sampler, so
2565           * invalidate the texture cache, we don't want any stale data.
2566           */
2567          pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
2568          break;
2569       case VK_ACCESS_MEMORY_READ_BIT:
2570          /* Transitioning a buffer for generic read, invalidate all the
2571           * caches.
2572           */
2573          pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
2574          break;
2575       case VK_ACCESS_MEMORY_WRITE_BIT:
2576          /* Generic write, make sure all previously written things land in
2577           * memory.
2578           */
2579          pipe_bits |= ANV_PIPE_FLUSH_BITS;
2580          break;
2581       case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
2582          /* Transitioning a buffer for conditional rendering. We'll load the
2583           * content of this buffer into HW registers using the command
2584           * streamer, so we need to stall the command streamer to make sure
2585           * any in-flight flush operations have completed.
2586           */
2587          pipe_bits |= ANV_PIPE_CS_STALL_BIT;
2588          break;
2589       default:
2590          break; /* Nothing to do */
2591       }
2592    }
2593 
2594    return pipe_bits;
2595 }
2596 
2597 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV (         \
2598    VK_IMAGE_ASPECT_COLOR_BIT | \
2599    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2600    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2601    VK_IMAGE_ASPECT_PLANE_2_BIT)
2602 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
2603    VK_IMAGE_ASPECT_PLANE_0_BIT | \
2604    VK_IMAGE_ASPECT_PLANE_1_BIT | \
2605    VK_IMAGE_ASPECT_PLANE_2_BIT)
2606 
2607 struct anv_vertex_binding {
2608    struct anv_buffer *                          buffer;
2609    VkDeviceSize                                 offset;
2610    VkDeviceSize                                 stride;
2611    VkDeviceSize                                 size;
2612 };
2613 
2614 struct anv_xfb_binding {
2615    struct anv_buffer *                          buffer;
2616    VkDeviceSize                                 offset;
2617    VkDeviceSize                                 size;
2618 };
2619 
2620 struct anv_push_constants {
2621    /** Push constant data provided by the client through vkPushConstants */
2622    uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
2623 
2624    /** Dynamic offsets for dynamic UBOs and SSBOs */
2625    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2626 
2627    uint64_t push_reg_mask;
2628 
2629    /** Pad out to a multiple of 32 bytes */
2630    uint32_t pad[2];
2631 
2632    struct {
2633       /** Base workgroup ID
2634        *
2635        * Used for vkCmdDispatchBase.
2636        */
2637       uint32_t base_work_group_id[3];
2638 
2639       /** Subgroup ID
2640        *
2641        * This is never set by software but is implicitly filled out when
2642        * uploading the push constants for compute shaders.
2643        */
2644       uint32_t subgroup_id;
2645    } cs;
2646 };
2647 
2648 struct anv_dynamic_state {
2649    struct {
2650       uint32_t                                  count;
2651       VkViewport                                viewports[MAX_VIEWPORTS];
2652    } viewport;
2653 
2654    struct {
2655       uint32_t                                  count;
2656       VkRect2D                                  scissors[MAX_SCISSORS];
2657    } scissor;
2658 
2659    float                                        line_width;
2660 
2661    struct {
2662       float                                     bias;
2663       float                                     clamp;
2664       float                                     slope;
2665    } depth_bias;
2666 
2667    float                                        blend_constants[4];
2668 
2669    struct {
2670       float                                     min;
2671       float                                     max;
2672    } depth_bounds;
2673 
2674    struct {
2675       uint32_t                                  front;
2676       uint32_t                                  back;
2677    } stencil_compare_mask;
2678 
2679    struct {
2680       uint32_t                                  front;
2681       uint32_t                                  back;
2682    } stencil_write_mask;
2683 
2684    struct {
2685       uint32_t                                  front;
2686       uint32_t                                  back;
2687    } stencil_reference;
2688 
2689    struct {
2690       struct {
2691          VkStencilOp fail_op;
2692          VkStencilOp pass_op;
2693          VkStencilOp depth_fail_op;
2694          VkCompareOp compare_op;
2695       } front;
2696       struct {
2697          VkStencilOp fail_op;
2698          VkStencilOp pass_op;
2699          VkStencilOp depth_fail_op;
2700          VkCompareOp compare_op;
2701       } back;
2702    } stencil_op;
2703 
2704    struct {
2705       uint32_t                                  factor;
2706       uint16_t                                  pattern;
2707    } line_stipple;
2708 
2709    VkCullModeFlags                              cull_mode;
2710    VkFrontFace                                  front_face;
2711    VkPrimitiveTopology                          primitive_topology;
2712    bool                                         depth_test_enable;
2713    bool                                         depth_write_enable;
2714    VkCompareOp                                  depth_compare_op;
2715    bool                                         depth_bounds_test_enable;
2716    bool                                         stencil_test_enable;
2717    bool                                         dyn_vbo_stride;
2718    bool                                         dyn_vbo_size;
2719 };
2720 
2721 extern const struct anv_dynamic_state default_dynamic_state;
2722 
2723 uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,
2724                                 const struct anv_dynamic_state *src,
2725                                 uint32_t copy_mask);
2726 
2727 struct anv_surface_state {
2728    struct anv_state state;
2729    /** Address of the surface referred to by this state
2730     *
2731     * This address is relative to the start of the BO.
2732     */
2733    struct anv_address address;
2734    /* Address of the aux surface, if any
2735     *
2736     * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2737     *
2738     * With the exception of gen8, the bottom 12 bits of this address' offset
2739     * include extra aux information.
2740     */
2741    struct anv_address aux_address;
2742    /* Address of the clear color, if any
2743     *
2744     * This address is relative to the start of the BO.
2745     */
2746    struct anv_address clear_address;
2747 };
2748 
2749 /**
2750  * Attachment state when recording a renderpass instance.
2751  *
2752  * The clear value is valid only if there exists a pending clear.
2753  */
2754 struct anv_attachment_state {
2755    enum isl_aux_usage                           aux_usage;
2756    struct anv_surface_state                     color;
2757    struct anv_surface_state                     input;
2758 
2759    VkImageLayout                                current_layout;
2760    VkImageLayout                                current_stencil_layout;
2761    VkImageAspectFlags                           pending_clear_aspects;
2762    VkImageAspectFlags                           pending_load_aspects;
2763    bool                                         fast_clear;
2764    VkClearValue                                 clear_value;
2765 
2766    /* When multiview is active, attachments with a renderpass clear
2767     * operation have their respective layers cleared on the first
2768     * subpass that uses them, and only in that subpass. We keep track
2769     * of this using a bitfield to indicate which layers of an attachment
2770     * have not been cleared yet when multiview is active.
2771     */
2772    uint32_t                                     pending_clear_views;
2773    struct anv_image_view *                      image_view;
2774 };
2775 
2776 /** State tracking for vertex buffer flushes
2777  *
2778  * On Gen8-9, the VF cache only considers the bottom 32 bits of memory
2779  * addresses.  If you happen to have two vertex buffers which get placed
2780  * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
2781  * collisions.  In order to solve this problem, we track vertex address ranges
2782  * which are live in the cache and invalidate the cache if one ever exceeds 32
2783  * bits.
2784  */
2785 struct anv_vb_cache_range {
2786    /* Virtual address at which the live vertex buffer cache range starts for
2787     * this vertex buffer index.
2788     */
2789    uint64_t start;
2790 
2791    /* Virtual address of the byte after where vertex buffer cache range ends.
2792     * This is exclusive such that end - start is the size of the range.
2793     */
2794    uint64_t end;
2795 };
2796 
2797 /** State tracking for particular pipeline bind point
2798  *
2799  * This struct is the base struct for anv_cmd_graphics_state and
2800  * anv_cmd_compute_state.  These are used to track state which is bound to a
2801  * particular type of pipeline.  Generic state that applies per-stage such as
2802  * binding table offsets and push constants is tracked generically with a
2803  * per-stage array in anv_cmd_state.
2804  */
2805 struct anv_cmd_pipeline_state {
2806    struct anv_descriptor_set *descriptors[MAX_SETS];
2807    struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2808 };
2809 
2810 /** State tracking for graphics pipeline
2811  *
2812  * This has anv_cmd_pipeline_state as a base struct to track things which get
2813  * bound to a graphics pipeline.  Along with general pipeline bind point state
2814  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2815  * state which is graphics-specific.
2816  */
2817 struct anv_cmd_graphics_state {
2818    struct anv_cmd_pipeline_state base;
2819 
2820    struct anv_graphics_pipeline *pipeline;
2821 
2822    anv_cmd_dirty_mask_t dirty;
2823    uint32_t vb_dirty;
2824 
2825    struct anv_vb_cache_range ib_bound_range;
2826    struct anv_vb_cache_range ib_dirty_range;
2827    struct anv_vb_cache_range vb_bound_ranges[33];
2828    struct anv_vb_cache_range vb_dirty_ranges[33];
2829 
2830    struct anv_dynamic_state dynamic;
2831 
2832    uint32_t primitive_topology;
2833 
2834    struct {
2835       struct anv_buffer *index_buffer;
2836       uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2837       uint32_t index_offset;
2838    } gen7;
2839 };
2840 
2841 /** State tracking for compute pipeline
2842  *
2843  * This has anv_cmd_pipeline_state as a base struct to track things which get
2844  * bound to a compute pipeline.  Along with general pipeline bind point state
2845  * which is in the anv_cmd_pipeline_state base struct, it also contains other
2846  * state which is compute-specific.
2847  */
2848 struct anv_cmd_compute_state {
2849    struct anv_cmd_pipeline_state base;
2850 
2851    struct anv_compute_pipeline *pipeline;
2852 
2853    bool pipeline_dirty;
2854 
2855    struct anv_address num_workgroups;
2856 };
2857 
2858 /** State required while building cmd buffer */
2859 struct anv_cmd_state {
2860    /* PIPELINE_SELECT.PipelineSelection */
2861    uint32_t                                     current_pipeline;
2862    const struct gen_l3_config *                 current_l3_config;
2863    uint32_t                                     last_aux_map_state;
2864 
2865    struct anv_cmd_graphics_state                gfx;
2866    struct anv_cmd_compute_state                 compute;
2867 
2868    enum anv_pipe_bits                           pending_pipe_bits;
2869    VkShaderStageFlags                           descriptors_dirty;
2870    VkShaderStageFlags                           push_constants_dirty;
2871 
2872    struct anv_framebuffer *                     framebuffer;
2873    struct anv_render_pass *                     pass;
2874    struct anv_subpass *                         subpass;
2875    VkRect2D                                     render_area;
2876    uint32_t                                     restart_index;
2877    struct anv_vertex_binding                    vertex_bindings[MAX_VBS];
2878    bool                                         xfb_enabled;
2879    struct anv_xfb_binding                       xfb_bindings[MAX_XFB_BUFFERS];
2880    VkShaderStageFlags                           push_constant_stages;
2881    struct anv_push_constants                    push_constants[MESA_SHADER_STAGES];
2882    struct anv_state                             binding_tables[MESA_SHADER_STAGES];
2883    struct anv_state                             samplers[MESA_SHADER_STAGES];
2884 
2885    unsigned char                                sampler_sha1s[MESA_SHADER_STAGES][20];
2886    unsigned char                                surface_sha1s[MESA_SHADER_STAGES][20];
2887    unsigned char                                push_sha1s[MESA_SHADER_STAGES][20];
2888 
2889    /**
2890     * Whether or not the gen8 PMA fix is enabled.  We ensure that, at the top
2891     * of any command buffer it is disabled by disabling it in EndCommandBuffer
2892     * and before invoking the secondary in ExecuteCommands.
2893     */
2894    bool                                         pma_fix_enabled;
2895 
2896    /**
2897     * Whether or not we know for certain that HiZ is enabled for the current
2898     * subpass.  If, for whatever reason, we are unsure as to whether HiZ is
2899     * enabled or not, this will be false.
2900     */
2901    bool                                         hiz_enabled;
2902 
2903    bool                                         conditional_render_enabled;
2904 
2905    /**
2906     * Last rendering scale argument provided to
2907     * genX(cmd_buffer_emit_hashing_mode)().
2908     */
2909    unsigned                                     current_hash_scale;
2910 
2911    /**
2912     * Array length is anv_cmd_state::pass::attachment_count. Array content is
2913     * valid only when recording a render pass instance.
2914     */
2915    struct anv_attachment_state *                attachments;
2916 
2917    /**
2918     * Surface states for color render targets.  These are stored in a single
2919     * flat array.  For depth-stencil attachments, the surface state is simply
2920     * left blank.
2921     */
2922    struct anv_state                             attachment_states;
2923 
2924    /**
2925     * A null surface state of the right size to match the framebuffer.  This
2926     * is one of the states in attachment_states.
2927     */
2928    struct anv_state                             null_surface_state;
2929 };
2930 
2931 struct anv_cmd_pool {
2932    struct vk_object_base                        base;
2933    VkAllocationCallbacks                        alloc;
2934    struct list_head                             cmd_buffers;
2935 };
2936 
2937 #define ANV_CMD_BUFFER_BATCH_SIZE 8192
2938 
2939 enum anv_cmd_buffer_exec_mode {
2940    ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
2941    ANV_CMD_BUFFER_EXEC_MODE_EMIT,
2942    ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
2943    ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
2944    ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
2945    ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,
2946 };
2947 
2948 struct anv_cmd_buffer {
2949    struct vk_object_base                        base;
2950 
2951    struct anv_device *                          device;
2952 
2953    struct anv_cmd_pool *                        pool;
2954    struct list_head                             pool_link;
2955 
2956    struct anv_batch                             batch;
2957 
2958    /* Fields required for the actual chain of anv_batch_bo's.
2959     *
2960     * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
2961     */
2962    struct list_head                             batch_bos;
2963    enum anv_cmd_buffer_exec_mode                exec_mode;
2964 
2965    /* A vector of anv_batch_bo pointers for every batch or surface buffer
2966     * referenced by this command buffer
2967     *
2968     * initialized by anv_cmd_buffer_init_batch_bo_chain()
2969     */
2970    struct u_vector                            seen_bbos;
2971 
2972    /* A vector of int32_t's for every block of binding tables.
2973     *
2974     * initialized by anv_cmd_buffer_init_batch_bo_chain()
2975     */
2976    struct u_vector                              bt_block_states;
2977    struct anv_state                             bt_next;
2978 
2979    struct anv_reloc_list                        surface_relocs;
2980    /** Last seen surface state block pool center bo offset */
2981    uint32_t                                     last_ss_pool_center;
2982 
2983    /* Serial for tracking buffer completion */
2984    uint32_t                                     serial;
2985 
2986    /* Stream objects for storing temporary data */
2987    struct anv_state_stream                      surface_state_stream;
2988    struct anv_state_stream                      dynamic_state_stream;
2989 
2990    VkCommandBufferUsageFlags                    usage_flags;
2991    VkCommandBufferLevel                         level;
2992 
2993    struct anv_query_pool                       *perf_query_pool;
2994 
2995    struct anv_cmd_state                         state;
2996 
2997    struct anv_address                           return_addr;
2998 
2999    /* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */
3000    uint64_t                                     intel_perf_marker;
3001 };
3002 
3003 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3004 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3005 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
3006 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
3007 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
3008                                   struct anv_cmd_buffer *secondary);
3009 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
3010 VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,
3011                                 struct anv_cmd_buffer *cmd_buffer,
3012                                 const VkSemaphore *in_semaphores,
3013                                 const uint64_t *in_wait_values,
3014                                 uint32_t num_in_semaphores,
3015                                 const VkSemaphore *out_semaphores,
3016                                 const uint64_t *out_signal_values,
3017                                 uint32_t num_out_semaphores,
3018                                 VkFence fence,
3019                                 int perf_query_pass);
3020 
3021 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
3022 
3023 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3024                                              const void *data, uint32_t size, uint32_t alignment);
3025 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3026                                               uint32_t *a, uint32_t *b,
3027                                               uint32_t dwords, uint32_t alignment);
3028 
3029 struct anv_address
3030 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
3031 struct anv_state
3032 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
3033                                    uint32_t entries, uint32_t *state_offset);
3034 struct anv_state
3035 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
3036 struct anv_state
3037 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
3038                                    uint32_t size, uint32_t alignment);
3039 
3040 VkResult
3041 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
3042 
3043 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
3044 void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
3045                                          bool depth_clamp_enable);
3046 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
3047 
3048 void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
3049                                       struct anv_render_pass *pass,
3050                                       struct anv_framebuffer *framebuffer,
3051                                       const VkClearValue *clear_values);
3052 
3053 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
3054 
3055 struct anv_state
3056 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
3057                               gl_shader_stage stage);
3058 struct anv_state
3059 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
3060 
3061 const struct anv_image_view *
3062 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
3063 
3064 VkResult
3065 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
3066                                          uint32_t num_entries,
3067                                          uint32_t *state_offset,
3068                                          struct anv_state *bt_state);
3069 
3070 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
3071 
3072 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
3073 
3074 enum anv_fence_type {
3075    ANV_FENCE_TYPE_NONE = 0,
3076    ANV_FENCE_TYPE_BO,
3077    ANV_FENCE_TYPE_WSI_BO,
3078    ANV_FENCE_TYPE_SYNCOBJ,
3079    ANV_FENCE_TYPE_WSI,
3080 };
3081 
3082 enum anv_bo_fence_state {
3083    /** Indicates that this is a new (or newly reset fence) */
3084    ANV_BO_FENCE_STATE_RESET,
3085 
3086    /** Indicates that this fence has been submitted to the GPU but is still
3087     * (as far as we know) in use by the GPU.
3088     */
3089    ANV_BO_FENCE_STATE_SUBMITTED,
3090 
3091    ANV_BO_FENCE_STATE_SIGNALED,
3092 };
3093 
3094 struct anv_fence_impl {
3095    enum anv_fence_type type;
3096 
3097    union {
3098       /** Fence implementation for BO fences
3099        *
3100        * These fences use a BO and a set of CPU-tracked state flags.  The BO
3101        * is added to the object list of the last execbuf call in a QueueSubmit
3102        * and is marked EXEC_WRITE.  The state flags track when the BO has been
3103        * submitted to the kernel.  We need to do this because Vulkan lets you
3104        * wait on a fence that has not yet been submitted and I915_GEM_BUSY
3105        * will say it's idle in this case.
3106        */
3107       struct {
3108          struct anv_bo *bo;
3109          enum anv_bo_fence_state state;
3110       } bo;
3111 
3112       /** DRM syncobj handle for syncobj-based fences */
3113       uint32_t syncobj;
3114 
3115       /** WSI fence */
3116       struct wsi_fence *fence_wsi;
3117    };
3118 };
3119 
3120 struct anv_fence {
3121    struct vk_object_base base;
3122 
3123    /* Permanent fence state.  Every fence has some form of permanent state
3124     * (type != ANV_SEMAPHORE_TYPE_NONE).  This may be a BO to fence on (for
3125     * cross-process fences) or it could just be a dummy for use internally.
3126     */
3127    struct anv_fence_impl permanent;
3128 
3129    /* Temporary fence state.  A fence *may* have temporary state.  That state
3130     * is added to the fence by an import operation and is reset back to
3131     * ANV_SEMAPHORE_TYPE_NONE when the fence is reset.  A fence with temporary
3132     * state cannot be signaled because the fence must already be signaled
3133     * before the temporary state can be exported from the fence in the other
3134     * process and imported here.
3135     */
3136    struct anv_fence_impl temporary;
3137 };
3138 
3139 void anv_fence_reset_temporary(struct anv_device *device,
3140                                struct anv_fence *fence);
3141 
3142 struct anv_event {
3143    struct vk_object_base                        base;
3144    uint64_t                                     semaphore;
3145    struct anv_state                             state;
3146 };
3147 
3148 enum anv_semaphore_type {
3149    ANV_SEMAPHORE_TYPE_NONE = 0,
3150    ANV_SEMAPHORE_TYPE_DUMMY,
3151    ANV_SEMAPHORE_TYPE_BO,
3152    ANV_SEMAPHORE_TYPE_WSI_BO,
3153    ANV_SEMAPHORE_TYPE_SYNC_FILE,
3154    ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
3155    ANV_SEMAPHORE_TYPE_TIMELINE,
3156 };
3157 
3158 struct anv_timeline_point {
3159    struct list_head link;
3160 
3161    uint64_t serial;
3162 
3163    /* Number of waiter on this point, when > 0 the point should not be garbage
3164     * collected.
3165     */
3166    int waiting;
3167 
3168    /* BO used for synchronization. */
3169    struct anv_bo *bo;
3170 };
3171 
3172 struct anv_timeline {
3173    pthread_mutex_t mutex;
3174    pthread_cond_t  cond;
3175 
3176    uint64_t highest_past;
3177    uint64_t highest_pending;
3178 
3179    struct list_head points;
3180    struct list_head free_points;
3181 };
3182 
3183 struct anv_semaphore_impl {
3184    enum anv_semaphore_type type;
3185 
3186    union {
3187       /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
3188        * or type == ANV_SEMAPHORE_TYPE_WSI_BO.  This BO will be added to the
3189        * object list on any execbuf2 calls for which this semaphore is used as
3190        * a wait or signal fence.  When used as a signal fence or when type ==
3191        * ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
3192        */
3193       struct anv_bo *bo;
3194 
3195       /* The sync file descriptor when type == ANV_SEMAPHORE_TYPE_SYNC_FILE.
3196        * If the semaphore is in the unsignaled state due to either just being
3197        * created or because it has been used for a wait, fd will be -1.
3198        */
3199       int fd;
3200 
3201       /* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
3202        * Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
3203        * import so we don't need to bother with a userspace cache.
3204        */
3205       uint32_t syncobj;
3206 
3207       /* Non shareable timeline semaphore
3208        *
3209        * Used when kernel don't have support for timeline semaphores.
3210        */
3211       struct anv_timeline timeline;
3212    };
3213 };
3214 
3215 struct anv_semaphore {
3216    struct vk_object_base base;
3217 
3218    uint32_t refcount;
3219 
3220    /* Permanent semaphore state.  Every semaphore has some form of permanent
3221     * state (type != ANV_SEMAPHORE_TYPE_NONE).  This may be a BO to fence on
3222     * (for cross-process semaphores0 or it could just be a dummy for use
3223     * internally.
3224     */
3225    struct anv_semaphore_impl permanent;
3226 
3227    /* Temporary semaphore state.  A semaphore *may* have temporary state.
3228     * That state is added to the semaphore by an import operation and is reset
3229     * back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on.  A
3230     * semaphore with temporary state cannot be signaled because the semaphore
3231     * must already be signaled before the temporary state can be exported from
3232     * the semaphore in the other process and imported here.
3233     */
3234    struct anv_semaphore_impl temporary;
3235 };
3236 
3237 void anv_semaphore_reset_temporary(struct anv_device *device,
3238                                    struct anv_semaphore *semaphore);
3239 
3240 struct anv_shader_module {
3241    struct vk_object_base                        base;
3242 
3243    unsigned char                                sha1[20];
3244    uint32_t                                     size;
3245    char                                         data[0];
3246 };
3247 
3248 static inline gl_shader_stage
vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)3249 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
3250 {
3251    assert(__builtin_popcount(vk_stage) == 1);
3252    return ffs(vk_stage) - 1;
3253 }
3254 
3255 static inline VkShaderStageFlagBits
mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)3256 mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
3257 {
3258    return (1 << mesa_stage);
3259 }
3260 
3261 #define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
3262 
3263 #define anv_foreach_stage(stage, stage_bits)                         \
3264    for (gl_shader_stage stage,                                       \
3265         __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK);    \
3266         stage = __builtin_ffs(__tmp) - 1, __tmp;                     \
3267         __tmp &= ~(1 << (stage)))
3268 
3269 struct anv_pipeline_bind_map {
3270    unsigned char                                surface_sha1[20];
3271    unsigned char                                sampler_sha1[20];
3272    unsigned char                                push_sha1[20];
3273 
3274    uint32_t surface_count;
3275    uint32_t sampler_count;
3276 
3277    struct anv_pipeline_binding *                surface_to_descriptor;
3278    struct anv_pipeline_binding *                sampler_to_descriptor;
3279 
3280    struct anv_push_range                        push_ranges[4];
3281 };
3282 
3283 struct anv_shader_bin_key {
3284    uint32_t size;
3285    uint8_t data[0];
3286 };
3287 
3288 struct anv_shader_bin {
3289    uint32_t ref_cnt;
3290 
3291    gl_shader_stage stage;
3292 
3293    const struct anv_shader_bin_key *key;
3294 
3295    struct anv_state kernel;
3296    uint32_t kernel_size;
3297 
3298    struct anv_state constant_data;
3299    uint32_t constant_data_size;
3300 
3301    const struct brw_stage_prog_data *prog_data;
3302    uint32_t prog_data_size;
3303 
3304    struct brw_compile_stats stats[3];
3305    uint32_t num_stats;
3306 
3307    struct nir_xfb_info *xfb_info;
3308 
3309    struct anv_pipeline_bind_map bind_map;
3310 };
3311 
3312 struct anv_shader_bin *
3313 anv_shader_bin_create(struct anv_device *device,
3314                       gl_shader_stage stage,
3315                       const void *key, uint32_t key_size,
3316                       const void *kernel, uint32_t kernel_size,
3317                       const void *constant_data, uint32_t constant_data_size,
3318                       const struct brw_stage_prog_data *prog_data,
3319                       uint32_t prog_data_size,
3320                       const struct brw_compile_stats *stats, uint32_t num_stats,
3321                       const struct nir_xfb_info *xfb_info,
3322                       const struct anv_pipeline_bind_map *bind_map);
3323 
3324 void
3325 anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
3326 
3327 static inline void
anv_shader_bin_ref(struct anv_shader_bin * shader)3328 anv_shader_bin_ref(struct anv_shader_bin *shader)
3329 {
3330    assert(shader && shader->ref_cnt >= 1);
3331    p_atomic_inc(&shader->ref_cnt);
3332 }
3333 
3334 static inline void
anv_shader_bin_unref(struct anv_device * device,struct anv_shader_bin * shader)3335 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
3336 {
3337    assert(shader && shader->ref_cnt >= 1);
3338    if (p_atomic_dec_zero(&shader->ref_cnt))
3339       anv_shader_bin_destroy(device, shader);
3340 }
3341 
3342 struct anv_pipeline_executable {
3343    gl_shader_stage stage;
3344 
3345    struct brw_compile_stats stats;
3346 
3347    char *nir;
3348    char *disasm;
3349 };
3350 
3351 enum anv_pipeline_type {
3352    ANV_PIPELINE_GRAPHICS,
3353    ANV_PIPELINE_COMPUTE,
3354 };
3355 
3356 struct anv_pipeline {
3357    struct vk_object_base                        base;
3358 
3359    struct anv_device *                          device;
3360 
3361    struct anv_batch                             batch;
3362    struct anv_reloc_list                        batch_relocs;
3363 
3364    void *                                       mem_ctx;
3365 
3366    enum anv_pipeline_type                       type;
3367    VkPipelineCreateFlags                        flags;
3368 
3369    struct util_dynarray                         executables;
3370 
3371    const struct gen_l3_config *                 l3_config;
3372 };
3373 
3374 struct anv_graphics_pipeline {
3375    struct anv_pipeline                          base;
3376 
3377    uint32_t                                     batch_data[512];
3378 
3379    anv_cmd_dirty_mask_t                         dynamic_state_mask;
3380    struct anv_dynamic_state                     dynamic_state;
3381 
3382    uint32_t                                     topology;
3383 
3384    struct anv_subpass *                         subpass;
3385 
3386    struct anv_shader_bin *                      shaders[MESA_SHADER_STAGES];
3387 
3388    VkShaderStageFlags                           active_stages;
3389 
3390    bool                                         primitive_restart;
3391    bool                                         writes_depth;
3392    bool                                         depth_test_enable;
3393    bool                                         writes_stencil;
3394    bool                                         stencil_test_enable;
3395    bool                                         depth_clamp_enable;
3396    bool                                         depth_clip_enable;
3397    bool                                         sample_shading_enable;
3398    bool                                         kill_pixel;
3399    bool                                         depth_bounds_test_enable;
3400 
3401    /* When primitive replication is used, subpass->view_mask will describe what
3402     * views to replicate.
3403     */
3404    bool                                         use_primitive_replication;
3405 
3406    struct anv_state                             blend_state;
3407 
3408    uint32_t                                     vb_used;
3409    struct anv_pipeline_vertex_binding {
3410       uint32_t                                  stride;
3411       bool                                      instanced;
3412       uint32_t                                  instance_divisor;
3413    } vb[MAX_VBS];
3414 
3415    struct {
3416       uint32_t                                  sf[7];
3417       uint32_t                                  depth_stencil_state[3];
3418       uint32_t                                  clip[4];
3419    } gen7;
3420 
3421    struct {
3422       uint32_t                                  sf[4];
3423       uint32_t                                  raster[5];
3424       uint32_t                                  wm_depth_stencil[3];
3425    } gen8;
3426 
3427    struct {
3428       uint32_t                                  wm_depth_stencil[4];
3429    } gen9;
3430 };
3431 
3432 struct anv_compute_pipeline {
3433    struct anv_pipeline                          base;
3434 
3435    struct anv_shader_bin *                      cs;
3436    uint32_t                                     cs_right_mask;
3437    uint32_t                                     batch_data[9];
3438    uint32_t                                     interface_descriptor_data[8];
3439 };
3440 
3441 #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum)             \
3442    static inline struct anv_##pipe_type##_pipeline *                 \
3443    anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline)      \
3444    {                                                                 \
3445       assert(pipeline->type == pipe_enum);                           \
3446       return (struct anv_##pipe_type##_pipeline *) pipeline;         \
3447    }
3448 
ANV_DECL_PIPELINE_DOWNCAST(graphics,ANV_PIPELINE_GRAPHICS)3449 ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)
3450 ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)
3451 
3452 static inline bool
3453 anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,
3454                        gl_shader_stage stage)
3455 {
3456    return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
3457 }
3458 
3459 #define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage)             \
3460 static inline const struct brw_##prefix##_prog_data *                   \
3461 get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline)  \
3462 {                                                                       \
3463    if (anv_pipeline_has_stage(pipeline, stage)) {                       \
3464       return (const struct brw_##prefix##_prog_data *)                  \
3465              pipeline->shaders[stage]->prog_data;                       \
3466    } else {                                                             \
3467       return NULL;                                                      \
3468    }                                                                    \
3469 }
3470 
ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs,MESA_SHADER_VERTEX)3471 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
3472 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
3473 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
3474 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
3475 ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
3476 
3477 static inline const struct brw_cs_prog_data *
3478 get_cs_prog_data(const struct anv_compute_pipeline *pipeline)
3479 {
3480    assert(pipeline->cs);
3481    return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;
3482 }
3483 
3484 static inline const struct brw_vue_prog_data *
anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline * pipeline)3485 anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)
3486 {
3487    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
3488       return &get_gs_prog_data(pipeline)->base;
3489    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
3490       return &get_tes_prog_data(pipeline)->base;
3491    else
3492       return &get_vs_prog_data(pipeline)->base;
3493 }
3494 
3495 VkResult
3496 anv_pipeline_init(struct anv_pipeline *pipeline,
3497                   struct anv_device *device,
3498                   enum anv_pipeline_type type,
3499                   VkPipelineCreateFlags flags,
3500                   const VkAllocationCallbacks *pAllocator);
3501 
3502 void
3503 anv_pipeline_finish(struct anv_pipeline *pipeline,
3504                     struct anv_device *device,
3505                     const VkAllocationCallbacks *pAllocator);
3506 
3507 VkResult
3508 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,
3509                            struct anv_pipeline_cache *cache,
3510                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
3511                            const VkAllocationCallbacks *alloc);
3512 
3513 VkResult
3514 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
3515                         struct anv_pipeline_cache *cache,
3516                         const VkComputePipelineCreateInfo *info,
3517                         const struct anv_shader_module *module,
3518                         const char *entrypoint,
3519                         const VkSpecializationInfo *spec_info);
3520 
3521 struct anv_cs_parameters {
3522    uint32_t group_size;
3523    uint32_t simd_size;
3524    uint32_t threads;
3525 };
3526 
3527 struct anv_cs_parameters
3528 anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
3529 
3530 struct anv_format_plane {
3531    enum isl_format isl_format:16;
3532    struct isl_swizzle swizzle;
3533 
3534    /* Whether this plane contains chroma channels */
3535    bool has_chroma;
3536 
3537    /* For downscaling of YUV planes */
3538    uint8_t denominator_scales[2];
3539 
3540    /* How to map sampled ycbcr planes to a single 4 component element. */
3541    struct isl_swizzle ycbcr_swizzle;
3542 
3543    /* What aspect is associated to this plane */
3544    VkImageAspectFlags aspect;
3545 };
3546 
3547 
3548 struct anv_format {
3549    struct anv_format_plane planes[3];
3550    VkFormat vk_format;
3551    uint8_t n_planes;
3552    bool can_ycbcr;
3553 };
3554 
3555 /**
3556  * Return the aspect's _format_ plane, not its _memory_ plane (using the
3557  * vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a
3558  * aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain
3559  * VK_IMAGE_ASPECT_MEMORY_PLANE_* .
3560  */
3561 static inline uint32_t
anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,VkImageAspectFlags aspect_mask)3562 anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,
3563                           VkImageAspectFlags aspect_mask)
3564 {
3565    switch (aspect_mask) {
3566    case VK_IMAGE_ASPECT_COLOR_BIT:
3567    case VK_IMAGE_ASPECT_DEPTH_BIT:
3568    case VK_IMAGE_ASPECT_PLANE_0_BIT:
3569       return 0;
3570    case VK_IMAGE_ASPECT_STENCIL_BIT:
3571       if ((image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)
3572          return 0;
3573       /* Fall-through */
3574    case VK_IMAGE_ASPECT_PLANE_1_BIT:
3575       return 1;
3576    case VK_IMAGE_ASPECT_PLANE_2_BIT:
3577       return 2;
3578    default:
3579       /* Purposefully assert with depth/stencil aspects. */
3580       unreachable("invalid image aspect");
3581    }
3582 }
3583 
3584 static inline VkImageAspectFlags
anv_plane_to_aspect(VkImageAspectFlags image_aspects,uint32_t plane)3585 anv_plane_to_aspect(VkImageAspectFlags image_aspects,
3586                     uint32_t plane)
3587 {
3588    if (image_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
3589       if (util_bitcount(image_aspects) > 1)
3590          return VK_IMAGE_ASPECT_PLANE_0_BIT << plane;
3591       return VK_IMAGE_ASPECT_COLOR_BIT;
3592    }
3593    if (image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
3594       return VK_IMAGE_ASPECT_DEPTH_BIT << plane;
3595    assert(image_aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
3596    return VK_IMAGE_ASPECT_STENCIL_BIT;
3597 }
3598 
3599 #define anv_foreach_image_aspect_bit(b, image, aspects) \
3600    for_each_bit(b, anv_image_expand_aspects(image, aspects))
3601 
3602 const struct anv_format *
3603 anv_get_format(VkFormat format);
3604 
3605 static inline uint32_t
anv_get_format_planes(VkFormat vk_format)3606 anv_get_format_planes(VkFormat vk_format)
3607 {
3608    const struct anv_format *format = anv_get_format(vk_format);
3609 
3610    return format != NULL ? format->n_planes : 0;
3611 }
3612 
3613 struct anv_format_plane
3614 anv_get_format_plane(const struct gen_device_info *devinfo, VkFormat vk_format,
3615                      VkImageAspectFlagBits aspect, VkImageTiling tiling);
3616 
3617 static inline enum isl_format
anv_get_isl_format(const struct gen_device_info * devinfo,VkFormat vk_format,VkImageAspectFlags aspect,VkImageTiling tiling)3618 anv_get_isl_format(const struct gen_device_info *devinfo, VkFormat vk_format,
3619                    VkImageAspectFlags aspect, VkImageTiling tiling)
3620 {
3621    return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format;
3622 }
3623 
3624 bool anv_formats_ccs_e_compatible(const struct gen_device_info *devinfo,
3625                                   VkImageCreateFlags create_flags,
3626                                   VkFormat vk_format,
3627                                   VkImageTiling vk_tiling,
3628                                   const VkImageFormatListCreateInfoKHR *fmt_list);
3629 
3630 static inline struct isl_swizzle
anv_swizzle_for_render(struct isl_swizzle swizzle)3631 anv_swizzle_for_render(struct isl_swizzle swizzle)
3632 {
3633    /* Sometimes the swizzle will have alpha map to one.  We do this to fake
3634     * RGB as RGBA for texturing
3635     */
3636    assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
3637           swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
3638 
3639    /* But it doesn't matter what we render to that channel */
3640    swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
3641 
3642    return swizzle;
3643 }
3644 
3645 void
3646 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
3647 
3648 /**
3649  * Subsurface of an anv_image.
3650  */
3651 struct anv_surface {
3652    /** Valid only if isl_surf::size_B > 0. */
3653    struct isl_surf isl;
3654 
3655    /**
3656     * Offset from VkImage's base address, as bound by vkBindImageMemory().
3657     */
3658    uint32_t offset;
3659 };
3660 
3661 struct anv_image {
3662    struct vk_object_base base;
3663 
3664    VkImageType type; /**< VkImageCreateInfo::imageType */
3665    /* The original VkFormat provided by the client.  This may not match any
3666     * of the actual surface formats.
3667     */
3668    VkFormat vk_format;
3669    const struct anv_format *format;
3670 
3671    VkImageAspectFlags aspects;
3672    VkExtent3D extent;
3673    uint32_t levels;
3674    uint32_t array_size;
3675    uint32_t samples; /**< VkImageCreateInfo::samples */
3676    uint32_t n_planes;
3677    VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */
3678    VkImageUsageFlags stencil_usage;
3679    VkImageCreateFlags create_flags; /* Flags used when creating image. */
3680    VkImageTiling tiling; /** VkImageCreateInfo::tiling */
3681 
3682    /** True if this is needs to be bound to an appropriately tiled BO.
3683     *
3684     * When not using modifiers, consumers such as X11, Wayland, and KMS need
3685     * the tiling passed via I915_GEM_SET_TILING.  When exporting these buffers
3686     * we require a dedicated allocation so that we can know to allocate a
3687     * tiled buffer.
3688     */
3689    bool needs_set_tiling;
3690 
3691    /**
3692     * Must be DRM_FORMAT_MOD_INVALID unless tiling is
3693     * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
3694     */
3695    uint64_t drm_format_mod;
3696 
3697    VkDeviceSize size;
3698    uint32_t alignment;
3699 
3700    /* Whether the image is made of several underlying buffer objects rather a
3701     * single one with different offsets.
3702     */
3703    bool disjoint;
3704 
3705    /* Image was created with external format. */
3706    bool external_format;
3707 
3708    /**
3709     * Image subsurfaces
3710     *
3711     * For each foo, anv_image::planes[x].surface is valid if and only if
3712     * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
3713     * to figure the number associated with a given aspect.
3714     *
3715     * The hardware requires that the depth buffer and stencil buffer be
3716     * separate surfaces.  From Vulkan's perspective, though, depth and stencil
3717     * reside in the same VkImage.  To satisfy both the hardware and Vulkan, we
3718     * allocate the depth and stencil buffers as separate surfaces in the same
3719     * bo.
3720     *
3721     * Memory layout :
3722     *
3723     * -----------------------
3724     * |     surface0        |   /|\
3725     * -----------------------    |
3726     * |   shadow surface0   |    |
3727     * -----------------------    | Plane 0
3728     * |    aux surface0     |    |
3729     * -----------------------    |
3730     * | fast clear colors0  |   \|/
3731     * -----------------------
3732     * |     surface1        |   /|\
3733     * -----------------------    |
3734     * |   shadow surface1   |    |
3735     * -----------------------    | Plane 1
3736     * |    aux surface1     |    |
3737     * -----------------------    |
3738     * | fast clear colors1  |   \|/
3739     * -----------------------
3740     * |        ...          |
3741     * |                     |
3742     * -----------------------
3743     */
3744    struct {
3745       /**
3746        * Offset of the entire plane (whenever the image is disjoint this is
3747        * set to 0).
3748        */
3749       uint32_t offset;
3750 
3751       VkDeviceSize size;
3752       uint32_t alignment;
3753 
3754       struct anv_surface surface;
3755 
3756       /**
3757        * A surface which shadows the main surface and may have different
3758        * tiling. This is used for sampling using a tiling that isn't supported
3759        * for other operations.
3760        */
3761       struct anv_surface shadow_surface;
3762 
3763       /**
3764        * The base aux usage for this image.  For color images, this can be
3765        * either CCS_E or CCS_D depending on whether or not we can reliably
3766        * leave CCS on all the time.
3767        */
3768       enum isl_aux_usage aux_usage;
3769 
3770       struct anv_surface aux_surface;
3771 
3772       /**
3773        * Offset of the fast clear state (used to compute the
3774        * fast_clear_state_offset of the following planes).
3775        */
3776       uint32_t fast_clear_state_offset;
3777 
3778       /**
3779        * BO associated with this plane, set when bound.
3780        */
3781       struct anv_address address;
3782 
3783       /**
3784        * When destroying the image, also free the bo.
3785        * */
3786       bool bo_is_owned;
3787    } planes[3];
3788 };
3789 
3790 /* The ordering of this enum is important */
3791 enum anv_fast_clear_type {
3792    /** Image does not have/support any fast-clear blocks */
3793    ANV_FAST_CLEAR_NONE = 0,
3794    /** Image has/supports fast-clear but only to the default value */
3795    ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
3796    /** Image has/supports fast-clear with an arbitrary fast-clear value */
3797    ANV_FAST_CLEAR_ANY = 2,
3798 };
3799 
3800 /* Returns the number of auxiliary buffer levels attached to an image. */
3801 static inline uint8_t
anv_image_aux_levels(const struct anv_image * const image,VkImageAspectFlagBits aspect)3802 anv_image_aux_levels(const struct anv_image * const image,
3803                      VkImageAspectFlagBits aspect)
3804 {
3805    uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3806    if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)
3807       return 0;
3808 
3809    /* The Gen12 CCS aux surface is represented with only one level. */
3810    return image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ?
3811           image->planes[plane].surface.isl.levels :
3812           image->planes[plane].aux_surface.isl.levels;
3813 }
3814 
3815 /* Returns the number of auxiliary buffer layers attached to an image. */
3816 static inline uint32_t
anv_image_aux_layers(const struct anv_image * const image,VkImageAspectFlagBits aspect,const uint8_t miplevel)3817 anv_image_aux_layers(const struct anv_image * const image,
3818                      VkImageAspectFlagBits aspect,
3819                      const uint8_t miplevel)
3820 {
3821    assert(image);
3822 
3823    /* The miplevel must exist in the main buffer. */
3824    assert(miplevel < image->levels);
3825 
3826    if (miplevel >= anv_image_aux_levels(image, aspect)) {
3827       /* There are no layers with auxiliary data because the miplevel has no
3828        * auxiliary data.
3829        */
3830       return 0;
3831    } else {
3832       uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3833 
3834       /* The Gen12 CCS aux surface is represented with only one layer. */
3835       const struct isl_extent4d *aux_logical_level0_px =
3836          image->planes[plane].aux_surface.isl.tiling == ISL_TILING_GEN12_CCS ?
3837          &image->planes[plane].surface.isl.logical_level0_px :
3838          &image->planes[plane].aux_surface.isl.logical_level0_px;
3839 
3840       return MAX2(aux_logical_level0_px->array_len,
3841                   aux_logical_level0_px->depth >> miplevel);
3842    }
3843 }
3844 
3845 static inline struct anv_address
anv_image_get_clear_color_addr(UNUSED const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3846 anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,
3847                                const struct anv_image *image,
3848                                VkImageAspectFlagBits aspect)
3849 {
3850    assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
3851 
3852    uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3853    return anv_address_add(image->planes[plane].address,
3854                           image->planes[plane].fast_clear_state_offset);
3855 }
3856 
3857 static inline struct anv_address
anv_image_get_fast_clear_type_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect)3858 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
3859                                    const struct anv_image *image,
3860                                    VkImageAspectFlagBits aspect)
3861 {
3862    struct anv_address addr =
3863       anv_image_get_clear_color_addr(device, image, aspect);
3864 
3865    const unsigned clear_color_state_size = device->info.gen >= 10 ?
3866       device->isl_dev.ss.clear_color_state_size :
3867       device->isl_dev.ss.clear_value_size;
3868    return anv_address_add(addr, clear_color_state_size);
3869 }
3870 
3871 static inline struct anv_address
anv_image_get_compression_state_addr(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t array_layer)3872 anv_image_get_compression_state_addr(const struct anv_device *device,
3873                                      const struct anv_image *image,
3874                                      VkImageAspectFlagBits aspect,
3875                                      uint32_t level, uint32_t array_layer)
3876 {
3877    assert(level < anv_image_aux_levels(image, aspect));
3878    assert(array_layer < anv_image_aux_layers(image, aspect, level));
3879    UNUSED uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3880    assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
3881 
3882    struct anv_address addr =
3883       anv_image_get_fast_clear_type_addr(device, image, aspect);
3884    addr.offset += 4; /* Go past the fast clear type */
3885 
3886    if (image->type == VK_IMAGE_TYPE_3D) {
3887       for (uint32_t l = 0; l < level; l++)
3888          addr.offset += anv_minify(image->extent.depth, l) * 4;
3889    } else {
3890       addr.offset += level * image->array_size * 4;
3891    }
3892    addr.offset += array_layer * 4;
3893 
3894    assert(addr.offset <
3895           image->planes[plane].address.offset + image->planes[plane].size);
3896    return addr;
3897 }
3898 
3899 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3900 static inline bool
anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,const struct anv_image * image)3901 anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
3902                         const struct anv_image *image)
3903 {
3904    if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3905       return false;
3906 
3907    /* For Gen8-11, there are some restrictions around sampling from HiZ.
3908     * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode
3909     * say:
3910     *
3911     *    "If this field is set to AUX_HIZ, Number of Multisamples must
3912     *    be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."
3913     */
3914    if (image->type == VK_IMAGE_TYPE_3D)
3915       return false;
3916 
3917    /* Allow this feature on BDW even though it is disabled in the BDW devinfo
3918     * struct. There's documentation which suggests that this feature actually
3919     * reduces performance on BDW, but it has only been observed to help so
3920     * far. Sampling fast-cleared blocks on BDW must also be handled with care
3921     * (see depth_stencil_attachment_compute_aux_usage() for more info).
3922     */
3923    if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz)
3924       return false;
3925 
3926    return image->samples == 1;
3927 }
3928 
3929 static inline bool
anv_image_plane_uses_aux_map(const struct anv_device * device,const struct anv_image * image,uint32_t plane)3930 anv_image_plane_uses_aux_map(const struct anv_device *device,
3931                              const struct anv_image *image,
3932                              uint32_t plane)
3933 {
3934    return device->info.has_aux_map &&
3935       isl_aux_usage_has_ccs(image->planes[plane].aux_usage);
3936 }
3937 
3938 void
3939 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3940                                   const struct anv_image *image,
3941                                   VkImageAspectFlagBits aspect,
3942                                   enum isl_aux_usage aux_usage,
3943                                   uint32_t level,
3944                                   uint32_t base_layer,
3945                                   uint32_t layer_count);
3946 
3947 void
3948 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
3949                       const struct anv_image *image,
3950                       VkImageAspectFlagBits aspect,
3951                       enum isl_aux_usage aux_usage,
3952                       enum isl_format format, struct isl_swizzle swizzle,
3953                       uint32_t level, uint32_t base_layer, uint32_t layer_count,
3954                       VkRect2D area, union isl_color_value clear_color);
3955 void
3956 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3957                               const struct anv_image *image,
3958                               VkImageAspectFlags aspects,
3959                               enum isl_aux_usage depth_aux_usage,
3960                               uint32_t level,
3961                               uint32_t base_layer, uint32_t layer_count,
3962                               VkRect2D area,
3963                               float depth_value, uint8_t stencil_value);
3964 void
3965 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
3966                        const struct anv_image *src_image,
3967                        enum isl_aux_usage src_aux_usage,
3968                        uint32_t src_level, uint32_t src_base_layer,
3969                        const struct anv_image *dst_image,
3970                        enum isl_aux_usage dst_aux_usage,
3971                        uint32_t dst_level, uint32_t dst_base_layer,
3972                        VkImageAspectFlagBits aspect,
3973                        uint32_t src_x, uint32_t src_y,
3974                        uint32_t dst_x, uint32_t dst_y,
3975                        uint32_t width, uint32_t height,
3976                        uint32_t layer_count,
3977                        enum blorp_filter filter);
3978 void
3979 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
3980                  const struct anv_image *image,
3981                  VkImageAspectFlagBits aspect, uint32_t level,
3982                  uint32_t base_layer, uint32_t layer_count,
3983                  enum isl_aux_op hiz_op);
3984 void
3985 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
3986                     const struct anv_image *image,
3987                     VkImageAspectFlags aspects,
3988                     uint32_t level,
3989                     uint32_t base_layer, uint32_t layer_count,
3990                     VkRect2D area, uint8_t stencil_value);
3991 void
3992 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
3993                  const struct anv_image *image,
3994                  enum isl_format format, struct isl_swizzle swizzle,
3995                  VkImageAspectFlagBits aspect,
3996                  uint32_t base_layer, uint32_t layer_count,
3997                  enum isl_aux_op mcs_op, union isl_color_value *clear_value,
3998                  bool predicate);
3999 void
4000 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
4001                  const struct anv_image *image,
4002                  enum isl_format format, struct isl_swizzle swizzle,
4003                  VkImageAspectFlagBits aspect, uint32_t level,
4004                  uint32_t base_layer, uint32_t layer_count,
4005                  enum isl_aux_op ccs_op, union isl_color_value *clear_value,
4006                  bool predicate);
4007 
4008 void
4009 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
4010                          const struct anv_image *image,
4011                          VkImageAspectFlagBits aspect,
4012                          uint32_t base_level, uint32_t level_count,
4013                          uint32_t base_layer, uint32_t layer_count);
4014 
4015 enum isl_aux_state
4016 anv_layout_to_aux_state(const struct gen_device_info * const devinfo,
4017                         const struct anv_image *image,
4018                         const VkImageAspectFlagBits aspect,
4019                         const VkImageLayout layout);
4020 
4021 enum isl_aux_usage
4022 anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
4023                         const struct anv_image *image,
4024                         const VkImageAspectFlagBits aspect,
4025                         const VkImageUsageFlagBits usage,
4026                         const VkImageLayout layout);
4027 
4028 enum anv_fast_clear_type
4029 anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
4030                               const struct anv_image * const image,
4031                               const VkImageAspectFlagBits aspect,
4032                               const VkImageLayout layout);
4033 
4034 /* This is defined as a macro so that it works for both
4035  * VkImageSubresourceRange and VkImageSubresourceLayers
4036  */
4037 #define anv_get_layerCount(_image, _range) \
4038    ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
4039     (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
4040 
4041 static inline uint32_t
anv_get_levelCount(const struct anv_image * image,const VkImageSubresourceRange * range)4042 anv_get_levelCount(const struct anv_image *image,
4043                    const VkImageSubresourceRange *range)
4044 {
4045    return range->levelCount == VK_REMAINING_MIP_LEVELS ?
4046           image->levels - range->baseMipLevel : range->levelCount;
4047 }
4048 
4049 static inline VkImageAspectFlags
anv_image_expand_aspects(const struct anv_image * image,VkImageAspectFlags aspects)4050 anv_image_expand_aspects(const struct anv_image *image,
4051                          VkImageAspectFlags aspects)
4052 {
4053    /* If the underlying image has color plane aspects and
4054     * VK_IMAGE_ASPECT_COLOR_BIT has been requested, then return the aspects of
4055     * the underlying image. */
4056    if ((image->aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) != 0 &&
4057        aspects == VK_IMAGE_ASPECT_COLOR_BIT)
4058       return image->aspects;
4059 
4060    return aspects;
4061 }
4062 
4063 static inline bool
anv_image_aspects_compatible(VkImageAspectFlags aspects1,VkImageAspectFlags aspects2)4064 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
4065                              VkImageAspectFlags aspects2)
4066 {
4067    if (aspects1 == aspects2)
4068       return true;
4069 
4070    /* Only 1 color aspects are compatibles. */
4071    if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4072        (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
4073        util_bitcount(aspects1) == util_bitcount(aspects2))
4074       return true;
4075 
4076    return false;
4077 }
4078 
4079 struct anv_image_view {
4080    struct vk_object_base base;
4081 
4082    const struct anv_image *image; /**< VkImageViewCreateInfo::image */
4083 
4084    VkImageAspectFlags aspect_mask;
4085    VkFormat vk_format;
4086    VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
4087 
4088    unsigned n_planes;
4089    struct {
4090       uint32_t image_plane;
4091 
4092       struct isl_view isl;
4093 
4094       /**
4095        * RENDER_SURFACE_STATE when using image as a sampler surface with an
4096        * image layout of SHADER_READ_ONLY_OPTIMAL or
4097        * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
4098        */
4099       struct anv_surface_state optimal_sampler_surface_state;
4100 
4101       /**
4102        * RENDER_SURFACE_STATE when using image as a sampler surface with an
4103        * image layout of GENERAL.
4104        */
4105       struct anv_surface_state general_sampler_surface_state;
4106 
4107       /**
4108        * RENDER_SURFACE_STATE when using image as a storage image. Separate
4109        * states for write-only and readable, using the real format for
4110        * write-only and the lowered format for readable.
4111        */
4112       struct anv_surface_state storage_surface_state;
4113       struct anv_surface_state writeonly_storage_surface_state;
4114 
4115       struct brw_image_param storage_image_param;
4116    } planes[3];
4117 };
4118 
4119 enum anv_image_view_state_flags {
4120    ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY   = (1 << 0),
4121    ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL      = (1 << 1),
4122 };
4123 
4124 void anv_image_fill_surface_state(struct anv_device *device,
4125                                   const struct anv_image *image,
4126                                   VkImageAspectFlagBits aspect,
4127                                   const struct isl_view *view,
4128                                   isl_surf_usage_flags_t view_usage,
4129                                   enum isl_aux_usage aux_usage,
4130                                   const union isl_color_value *clear_color,
4131                                   enum anv_image_view_state_flags flags,
4132                                   struct anv_surface_state *state_inout,
4133                                   struct brw_image_param *image_param_out);
4134 
4135 struct anv_image_create_info {
4136    const VkImageCreateInfo *vk_info;
4137 
4138    /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
4139    isl_tiling_flags_t isl_tiling_flags;
4140 
4141    /** These flags will be added to any derived from VkImageCreateInfo. */
4142    isl_surf_usage_flags_t isl_extra_usage_flags;
4143 
4144    uint32_t stride;
4145    bool external_format;
4146 };
4147 
4148 VkResult anv_image_create(VkDevice _device,
4149                           const struct anv_image_create_info *info,
4150                           const VkAllocationCallbacks* alloc,
4151                           VkImage *pImage);
4152 
4153 enum isl_format
4154 anv_isl_format_for_descriptor_type(VkDescriptorType type);
4155 
4156 static inline VkExtent3D
anv_sanitize_image_extent(const VkImageType imageType,const VkExtent3D imageExtent)4157 anv_sanitize_image_extent(const VkImageType imageType,
4158                           const VkExtent3D imageExtent)
4159 {
4160    switch (imageType) {
4161    case VK_IMAGE_TYPE_1D:
4162       return (VkExtent3D) { imageExtent.width, 1, 1 };
4163    case VK_IMAGE_TYPE_2D:
4164       return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
4165    case VK_IMAGE_TYPE_3D:
4166       return imageExtent;
4167    default:
4168       unreachable("invalid image type");
4169    }
4170 }
4171 
4172 static inline VkOffset3D
anv_sanitize_image_offset(const VkImageType imageType,const VkOffset3D imageOffset)4173 anv_sanitize_image_offset(const VkImageType imageType,
4174                           const VkOffset3D imageOffset)
4175 {
4176    switch (imageType) {
4177    case VK_IMAGE_TYPE_1D:
4178       return (VkOffset3D) { imageOffset.x, 0, 0 };
4179    case VK_IMAGE_TYPE_2D:
4180       return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
4181    case VK_IMAGE_TYPE_3D:
4182       return imageOffset;
4183    default:
4184       unreachable("invalid image type");
4185    }
4186 }
4187 
4188 VkFormatFeatureFlags
4189 anv_get_image_format_features(const struct gen_device_info *devinfo,
4190                               VkFormat vk_format,
4191                               const struct anv_format *anv_format,
4192                               VkImageTiling vk_tiling);
4193 
4194 void anv_fill_buffer_surface_state(struct anv_device *device,
4195                                    struct anv_state state,
4196                                    enum isl_format format,
4197                                    struct anv_address address,
4198                                    uint32_t range, uint32_t stride);
4199 
4200 static inline void
anv_clear_color_from_att_state(union isl_color_value * clear_color,const struct anv_attachment_state * att_state,const struct anv_image_view * iview)4201 anv_clear_color_from_att_state(union isl_color_value *clear_color,
4202                                const struct anv_attachment_state *att_state,
4203                                const struct anv_image_view *iview)
4204 {
4205    const struct isl_format_layout *view_fmtl =
4206       isl_format_get_layout(iview->planes[0].isl.format);
4207 
4208 #define COPY_CLEAR_COLOR_CHANNEL(c, i) \
4209    if (view_fmtl->channels.c.bits) \
4210       clear_color->u32[i] = att_state->clear_value.color.uint32[i]
4211 
4212    COPY_CLEAR_COLOR_CHANNEL(r, 0);
4213    COPY_CLEAR_COLOR_CHANNEL(g, 1);
4214    COPY_CLEAR_COLOR_CHANNEL(b, 2);
4215    COPY_CLEAR_COLOR_CHANNEL(a, 3);
4216 
4217 #undef COPY_CLEAR_COLOR_CHANNEL
4218 }
4219 
4220 
4221 /* Haswell border color is a bit of a disaster.  Float and unorm formats use a
4222  * straightforward 32-bit float color in the first 64 bytes.  Instead of using
4223  * a nice float/integer union like Gen8+, Haswell specifies the integer border
4224  * color as a separate entry /after/ the float color.  The layout of this entry
4225  * also depends on the format's bpp (with extra hacks for RG32), and overlaps.
4226  *
4227  * Since we don't know the format/bpp, we can't make any of the border colors
4228  * containing '1' work for all formats, as it would be in the wrong place for
4229  * some of them.  We opt to make 32-bit integers work as this seems like the
4230  * most common option.  Fortunately, transparent black works regardless, as
4231  * all zeroes is the same in every bit-size.
4232  */
4233 struct hsw_border_color {
4234    float float32[4];
4235    uint32_t _pad0[12];
4236    uint32_t uint32[4];
4237    uint32_t _pad1[108];
4238 };
4239 
4240 struct gen8_border_color {
4241    union {
4242       float float32[4];
4243       uint32_t uint32[4];
4244    };
4245    /* Pad out to 64 bytes */
4246    uint32_t _pad[12];
4247 };
4248 
4249 struct anv_ycbcr_conversion {
4250    struct vk_object_base base;
4251 
4252    const struct anv_format *        format;
4253    VkSamplerYcbcrModelConversion    ycbcr_model;
4254    VkSamplerYcbcrRange              ycbcr_range;
4255    VkComponentSwizzle               mapping[4];
4256    VkChromaLocation                 chroma_offsets[2];
4257    VkFilter                         chroma_filter;
4258    bool                             chroma_reconstruction;
4259 };
4260 
4261 struct anv_sampler {
4262    struct vk_object_base        base;
4263 
4264    uint32_t                     state[3][4];
4265    uint32_t                     n_planes;
4266    struct anv_ycbcr_conversion *conversion;
4267 
4268    /* Blob of sampler state data which is guaranteed to be 32-byte aligned
4269     * and with a 32-byte stride for use as bindless samplers.
4270     */
4271    struct anv_state             bindless_state;
4272 
4273    struct anv_state             custom_border_color;
4274 };
4275 
4276 struct anv_framebuffer {
4277    struct vk_object_base                        base;
4278 
4279    uint32_t                                     width;
4280    uint32_t                                     height;
4281    uint32_t                                     layers;
4282 
4283    uint32_t                                     attachment_count;
4284    struct anv_image_view *                      attachments[0];
4285 };
4286 
4287 struct anv_subpass_attachment {
4288    VkImageUsageFlagBits usage;
4289    uint32_t attachment;
4290    VkImageLayout layout;
4291 
4292    /* Used only with attachment containing stencil data. */
4293    VkImageLayout stencil_layout;
4294 };
4295 
4296 struct anv_subpass {
4297    uint32_t                                     attachment_count;
4298 
4299    /**
4300     * A pointer to all attachment references used in this subpass.
4301     * Only valid if ::attachment_count > 0.
4302     */
4303    struct anv_subpass_attachment *              attachments;
4304    uint32_t                                     input_count;
4305    struct anv_subpass_attachment *              input_attachments;
4306    uint32_t                                     color_count;
4307    struct anv_subpass_attachment *              color_attachments;
4308    struct anv_subpass_attachment *              resolve_attachments;
4309 
4310    struct anv_subpass_attachment *              depth_stencil_attachment;
4311    struct anv_subpass_attachment *              ds_resolve_attachment;
4312    VkResolveModeFlagBitsKHR                     depth_resolve_mode;
4313    VkResolveModeFlagBitsKHR                     stencil_resolve_mode;
4314 
4315    uint32_t                                     view_mask;
4316 
4317    /** Subpass has a depth/stencil self-dependency */
4318    bool                                         has_ds_self_dep;
4319 
4320    /** Subpass has at least one color resolve attachment */
4321    bool                                         has_color_resolve;
4322 };
4323 
4324 static inline unsigned
anv_subpass_view_count(const struct anv_subpass * subpass)4325 anv_subpass_view_count(const struct anv_subpass *subpass)
4326 {
4327    return MAX2(1, util_bitcount(subpass->view_mask));
4328 }
4329 
4330 struct anv_render_pass_attachment {
4331    /* TODO: Consider using VkAttachmentDescription instead of storing each of
4332     * its members individually.
4333     */
4334    VkFormat                                     format;
4335    uint32_t                                     samples;
4336    VkImageUsageFlags                            usage;
4337    VkAttachmentLoadOp                           load_op;
4338    VkAttachmentStoreOp                          store_op;
4339    VkAttachmentLoadOp                           stencil_load_op;
4340    VkImageLayout                                initial_layout;
4341    VkImageLayout                                final_layout;
4342    VkImageLayout                                first_subpass_layout;
4343 
4344    VkImageLayout                                stencil_initial_layout;
4345    VkImageLayout                                stencil_final_layout;
4346 
4347    /* The subpass id in which the attachment will be used last. */
4348    uint32_t                                     last_subpass_idx;
4349 };
4350 
4351 struct anv_render_pass {
4352    struct vk_object_base                        base;
4353 
4354    uint32_t                                     attachment_count;
4355    uint32_t                                     subpass_count;
4356    /* An array of subpass_count+1 flushes, one per subpass boundary */
4357    enum anv_pipe_bits *                         subpass_flushes;
4358    struct anv_render_pass_attachment *          attachments;
4359    struct anv_subpass                           subpasses[0];
4360 };
4361 
4362 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
4363 
4364 #define OA_SNAPSHOT_SIZE (256)
4365 #define ANV_KHR_PERF_QUERY_SIZE (ALIGN(sizeof(uint64_t), 64) + 2 * OA_SNAPSHOT_SIZE)
4366 
4367 struct anv_query_pool {
4368    struct vk_object_base                        base;
4369 
4370    VkQueryType                                  type;
4371    VkQueryPipelineStatisticFlags                pipeline_statistics;
4372    /** Stride between slots, in bytes */
4373    uint32_t                                     stride;
4374    /** Number of slots in this query pool */
4375    uint32_t                                     slots;
4376    struct anv_bo *                              bo;
4377 
4378    /* Perf queries : */
4379    struct anv_bo                                reset_bo;
4380    uint32_t                                     n_counters;
4381    struct gen_perf_counter_pass                *counter_pass;
4382    uint32_t                                     n_passes;
4383    struct gen_perf_query_info                 **pass_query;
4384 };
4385 
khr_perf_query_preamble_offset(struct anv_query_pool * pool,uint32_t pass)4386 static inline uint32_t khr_perf_query_preamble_offset(struct anv_query_pool *pool,
4387                                                       uint32_t pass)
4388 {
4389    return pass * ANV_KHR_PERF_QUERY_SIZE + 8;
4390 }
4391 
4392 int anv_get_instance_entrypoint_index(const char *name);
4393 int anv_get_device_entrypoint_index(const char *name);
4394 int anv_get_physical_device_entrypoint_index(const char *name);
4395 
4396 const char *anv_get_instance_entry_name(int index);
4397 const char *anv_get_physical_device_entry_name(int index);
4398 const char *anv_get_device_entry_name(int index);
4399 
4400 bool
4401 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
4402                                    const struct anv_instance_extension_table *instance);
4403 bool
4404 anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
4405                                           const struct anv_instance_extension_table *instance);
4406 bool
4407 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
4408                                  const struct anv_instance_extension_table *instance,
4409                                  const struct anv_device_extension_table *device);
4410 
4411 void *anv_resolve_device_entrypoint(const struct gen_device_info *devinfo,
4412                                     uint32_t index);
4413 void *anv_lookup_entrypoint(const struct gen_device_info *devinfo,
4414                             const char *name);
4415 
4416 void anv_dump_image_to_ppm(struct anv_device *device,
4417                            struct anv_image *image, unsigned miplevel,
4418                            unsigned array_layer, VkImageAspectFlagBits aspect,
4419                            const char *filename);
4420 
4421 enum anv_dump_action {
4422    ANV_DUMP_FRAMEBUFFERS_BIT = 0x1,
4423 };
4424 
4425 void anv_dump_start(struct anv_device *device, enum anv_dump_action actions);
4426 void anv_dump_finish(void);
4427 
4428 void anv_dump_add_attachments(struct anv_cmd_buffer *cmd_buffer);
4429 
4430 static inline uint32_t
anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)4431 anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
4432 {
4433    /* This function must be called from within a subpass. */
4434    assert(cmd_state->pass && cmd_state->subpass);
4435 
4436    const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;
4437 
4438    /* The id of this subpass shouldn't exceed the number of subpasses in this
4439     * render pass minus 1.
4440     */
4441    assert(subpass_id < cmd_state->pass->subpass_count);
4442    return subpass_id;
4443 }
4444 
4445 struct anv_performance_configuration_intel {
4446    struct vk_object_base      base;
4447 
4448    struct gen_perf_registers *register_config;
4449 
4450    uint64_t                   config_id;
4451 };
4452 
4453 struct gen_perf_config *anv_get_perf(const struct gen_device_info *devinfo, int fd);
4454 void anv_device_perf_init(struct anv_device *device);
4455 void anv_perf_write_pass_results(struct gen_perf_config *perf,
4456                                  struct anv_query_pool *pool, uint32_t pass,
4457                                  const struct gen_perf_query_result *accumulated_results,
4458                                  union VkPerformanceCounterResultKHR *results);
4459 
4460 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
4461    VK_FROM_HANDLE(__anv_type, __name, __handle)
4462 
4463 VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, base, VkCommandBuffer,
4464                        VK_OBJECT_TYPE_COMMAND_BUFFER)
4465 VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
4466 VK_DEFINE_HANDLE_CASTS(anv_instance, base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
4467 VK_DEFINE_HANDLE_CASTS(anv_physical_device, base, VkPhysicalDevice,
4468                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
4469 VK_DEFINE_HANDLE_CASTS(anv_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE)
4470 
4471 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool,
4472                                VK_OBJECT_TYPE_COMMAND_POOL)
4473 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer,
4474                                VK_OBJECT_TYPE_BUFFER)
4475 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,
4476                                VK_OBJECT_TYPE_BUFFER_VIEW)
4477 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,
4478                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
4479 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,
4480                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
4481 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,
4482                                VkDescriptorSetLayout,
4483                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
4484 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
4485                                VkDescriptorUpdateTemplate,
4486                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
4487 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
4488                                VK_OBJECT_TYPE_DEVICE_MEMORY)
4489 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
4490 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
4491 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,
4492                                VK_OBJECT_TYPE_FRAMEBUFFER)
4493 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
4494 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, base, VkImageView,
4495                                VK_OBJECT_TYPE_IMAGE_VIEW);
4496 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,
4497                                VK_OBJECT_TYPE_PIPELINE_CACHE)
4498 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,
4499                                VK_OBJECT_TYPE_PIPELINE)
4500 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,
4501                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
4502 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,
4503                                VK_OBJECT_TYPE_QUERY_POOL)
4504 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,
4505                                VK_OBJECT_TYPE_RENDER_PASS)
4506 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
4507                                VK_OBJECT_TYPE_SAMPLER)
4508 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,
4509                                VK_OBJECT_TYPE_SEMAPHORE)
4510 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, base, VkShaderModule,
4511                                VK_OBJECT_TYPE_SHADER_MODULE)
4512 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
4513                                VkSamplerYcbcrConversion,
4514                                VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
4515 VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,
4516                                VkPerformanceConfigurationINTEL,
4517                                VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)
4518 
4519 /* Gen-specific function declarations */
4520 #ifdef genX
4521 #  include "anv_genX.h"
4522 #else
4523 #  define genX(x) gen7_##x
4524 #  include "anv_genX.h"
4525 #  undef genX
4526 #  define genX(x) gen75_##x
4527 #  include "anv_genX.h"
4528 #  undef genX
4529 #  define genX(x) gen8_##x
4530 #  include "anv_genX.h"
4531 #  undef genX
4532 #  define genX(x) gen9_##x
4533 #  include "anv_genX.h"
4534 #  undef genX
4535 #  define genX(x) gen10_##x
4536 #  include "anv_genX.h"
4537 #  undef genX
4538 #  define genX(x) gen11_##x
4539 #  include "anv_genX.h"
4540 #  undef genX
4541 #  define genX(x) gen12_##x
4542 #  include "anv_genX.h"
4543 #  undef genX
4544 #endif
4545 
4546 #endif /* ANV_PRIVATE_H */
4547