1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30 
31 #include <assert.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #ifdef HAVE_VALGRIND
38 #include <memcheck.h>
39 #include <valgrind.h>
40 #define VG(x) x
41 #else
42 #define VG(x) ((void)0)
43 #endif
44 
45 #include "c11/threads.h"
46 #ifndef _WIN32
47 #include <amdgpu.h>
48 #include <xf86drm.h>
49 #endif
50 #include "compiler/shader_enums.h"
51 #include "util/bitscan.h"
52 #include "util/cnd_monotonic.h"
53 #include "util/list.h"
54 #include "util/macros.h"
55 #include "util/rwlock.h"
56 #include "util/xmlconfig.h"
57 #include "vk_alloc.h"
58 #include "vk_command_buffer.h"
59 #include "vk_command_pool.h"
60 #include "vk_debug_report.h"
61 #include "vk_device.h"
62 #include "vk_format.h"
63 #include "vk_instance.h"
64 #include "vk_log.h"
65 #include "vk_physical_device.h"
66 #include "vk_shader_module.h"
67 #include "vk_queue.h"
68 #include "vk_util.h"
69 #include "vk_image.h"
70 
71 #include "ac_binary.h"
72 #include "ac_gpu_info.h"
73 #include "ac_shader_util.h"
74 #include "ac_spm.h"
75 #include "ac_sqtt.h"
76 #include "ac_surface.h"
77 #include "radv_constants.h"
78 #include "radv_descriptor_set.h"
79 #include "radv_radeon_winsys.h"
80 #include "radv_shader.h"
81 #include "sid.h"
82 
83 /* Pre-declarations needed for WSI entrypoints */
84 struct wl_surface;
85 struct wl_display;
86 typedef struct xcb_connection_t xcb_connection_t;
87 typedef uint32_t xcb_visualid_t;
88 typedef uint32_t xcb_window_t;
89 
90 #include <vulkan/vk_android_native_buffer.h>
91 #include <vulkan/vk_icd.h>
92 #include <vulkan/vulkan.h>
93 #include <vulkan/vulkan_android.h>
94 
95 #include "radv_entrypoints.h"
96 
97 #include "wsi_common.h"
98 
99 #ifdef __cplusplus
100 extern "C"
101 {
102 #endif
103 
104 /* Helper to determine if we should compile
105  * any of the Android AHB support.
106  *
107  * To actually enable the ext we also need
108  * the necessary kernel support.
109  */
110 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
111 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
112 #include <vndk/hardware_buffer.h>
113 #else
114 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
115 #endif
116 
117 #ifdef _WIN32
118 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
119 #else
120 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
121 #endif
122 
123 #ifdef _WIN32
124 #define radv_printflike(a, b)
125 #else
126 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
127 #endif
128 
129 static inline uint32_t
align_u32(uint32_t v,uint32_t a)130 align_u32(uint32_t v, uint32_t a)
131 {
132    assert(a != 0 && a == (a & -a));
133    return (v + a - 1) & ~(a - 1);
134 }
135 
136 static inline uint32_t
align_u32_npot(uint32_t v,uint32_t a)137 align_u32_npot(uint32_t v, uint32_t a)
138 {
139    return (v + a - 1) / a * a;
140 }
141 
142 static inline uint64_t
align_u64(uint64_t v,uint64_t a)143 align_u64(uint64_t v, uint64_t a)
144 {
145    assert(a != 0 && a == (a & -a));
146    return (v + a - 1) & ~(a - 1);
147 }
148 
149 static inline int32_t
align_i32(int32_t v,int32_t a)150 align_i32(int32_t v, int32_t a)
151 {
152    assert(a != 0 && a == (a & -a));
153    return (v + a - 1) & ~(a - 1);
154 }
155 
156 /** Alignment must be a power of 2. */
157 static inline bool
radv_is_aligned(uintmax_t n,uintmax_t a)158 radv_is_aligned(uintmax_t n, uintmax_t a)
159 {
160    assert(a == (a & -a));
161    return (n & (a - 1)) == 0;
162 }
163 
164 static inline uint32_t
round_up_u32(uint32_t v,uint32_t a)165 round_up_u32(uint32_t v, uint32_t a)
166 {
167    return (v + a - 1) / a;
168 }
169 
170 static inline uint64_t
round_up_u64(uint64_t v,uint64_t a)171 round_up_u64(uint64_t v, uint64_t a)
172 {
173    return (v + a - 1) / a;
174 }
175 
176 static inline uint32_t
radv_minify(uint32_t n,uint32_t levels)177 radv_minify(uint32_t n, uint32_t levels)
178 {
179    if (unlikely(n == 0))
180       return 0;
181    else
182       return MAX2(n >> levels, 1);
183 }
184 static inline float
radv_clamp_f(float f,float min,float max)185 radv_clamp_f(float f, float min, float max)
186 {
187    assert(min < max);
188 
189    if (f > max)
190       return max;
191    else if (f < min)
192       return min;
193    else
194       return f;
195 }
196 
197 static inline bool
radv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)198 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
199 {
200    if (*inout_mask & clear_mask) {
201       *inout_mask &= ~clear_mask;
202       return true;
203    } else {
204       return false;
205    }
206 }
207 
208 static inline int
radv_float_to_sfixed(float value,unsigned frac_bits)209 radv_float_to_sfixed(float value, unsigned frac_bits)
210 {
211    return value * (1 << frac_bits);
212 }
213 
214 static inline unsigned int
radv_float_to_ufixed(float value,unsigned frac_bits)215 radv_float_to_ufixed(float value, unsigned frac_bits)
216 {
217    return value * (1 << frac_bits);
218 }
219 
220 /* Whenever we generate an error, pass it through this function. Useful for
221  * debugging, where we can break on it. Only call at error site, not when
222  * propagating errors. Might be useful to plug in a stack trace here.
223  */
224 
225 struct radv_image_view;
226 struct radv_instance;
227 
228 void radv_loge(const char *format, ...) radv_printflike(1, 2);
229 void radv_loge_v(const char *format, va_list va);
230 void radv_logi(const char *format, ...) radv_printflike(1, 2);
231 void radv_logi_v(const char *format, va_list va);
232 
233 /* A non-fatal assert.  Useful for debugging. */
234 #ifdef NDEBUG
235 #define radv_assert(x)                                                                             \
236    do {                                                                                            \
237    } while (0)
238 #else
239 #define radv_assert(x)                                                                             \
240    do {                                                                                            \
241       if (unlikely(!(x)))                                                                          \
242          fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x);                            \
243    } while (0)
244 #endif
245 
246 int radv_get_instance_entrypoint_index(const char *name);
247 int radv_get_device_entrypoint_index(const char *name);
248 int radv_get_physical_device_entrypoint_index(const char *name);
249 
250 const char *radv_get_instance_entry_name(int index);
251 const char *radv_get_physical_device_entry_name(int index);
252 const char *radv_get_device_entry_name(int index);
253 
254 /* queue types */
255 enum radv_queue_family {
256    RADV_QUEUE_GENERAL,
257    RADV_QUEUE_COMPUTE,
258    RADV_QUEUE_TRANSFER,
259    RADV_MAX_QUEUE_FAMILIES,
260    RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
261 };
262 
263 struct radv_physical_device {
264    struct vk_physical_device vk;
265 
266    /* Link in radv_instance::physical_devices */
267    struct list_head link;
268 
269    struct radv_instance *instance;
270 
271    struct radeon_winsys *ws;
272    struct radeon_info rad_info;
273    char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
274    uint8_t driver_uuid[VK_UUID_SIZE];
275    uint8_t device_uuid[VK_UUID_SIZE];
276    uint8_t cache_uuid[VK_UUID_SIZE];
277 
278    int local_fd;
279    int master_fd;
280    struct wsi_device wsi_device;
281 
282    bool out_of_order_rast_allowed;
283 
284    /* Whether DCC should be enabled for MSAA textures. */
285    bool dcc_msaa_allowed;
286 
287    /* Whether to enable NGG. */
288    bool use_ngg;
289 
290    /* Whether to enable NGG culling. */
291    bool use_ngg_culling;
292 
293    /* Whether to enable NGG streamout. */
294    bool use_ngg_streamout;
295 
296    /* Number of threads per wave. */
297    uint8_t ps_wave_size;
298    uint8_t cs_wave_size;
299    uint8_t ge_wave_size;
300    uint8_t rt_wave_size;
301 
302    /* Whether to use the LLVM compiler backend */
303    bool use_llvm;
304 
305    /* Whether to emulate ETC2 image support on HW without support. */
306    bool emulate_etc2;
307 
308    /* This is the drivers on-disk cache used as a fallback as opposed to
309     * the pipeline cache defined by apps.
310     */
311    struct disk_cache *disk_cache;
312 
313    VkPhysicalDeviceMemoryProperties memory_properties;
314    enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
315    enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
316    unsigned heaps;
317 
318 #ifndef _WIN32
319    int available_nodes;
320    drmPciBusInfo bus_info;
321 
322    dev_t primary_devid;
323    dev_t render_devid;
324 #endif
325 
326    nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
327 
328    enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
329    uint32_t num_queues;
330 };
331 
332 struct radv_instance {
333    struct vk_instance vk;
334 
335    VkAllocationCallbacks alloc;
336 
337    uint64_t debug_flags;
338    uint64_t perftest_flags;
339 
340    bool physical_devices_enumerated;
341    struct list_head physical_devices;
342 
343    struct driOptionCache dri_options;
344    struct driOptionCache available_dri_options;
345 
346    /**
347     * Workarounds for game bugs.
348     */
349    bool enable_mrt_output_nan_fixup;
350    bool disable_tc_compat_htile_in_general;
351    bool disable_shrink_image_store;
352    bool absolute_depth_bias;
353    bool report_apu_as_dgpu;
354    bool disable_htile_layers;
355    bool disable_aniso_single_level;
356    bool zero_vram;
357 };
358 
359 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
360 void radv_finish_wsi(struct radv_physical_device *physical_device);
361 
362 struct cache_entry;
363 
364 struct radv_pipeline_cache {
365    struct vk_object_base base;
366    struct radv_device *device;
367    mtx_t mutex;
368    VkPipelineCacheCreateFlags flags;
369 
370    uint32_t total_size;
371    uint32_t table_size;
372    uint32_t kernel_count;
373    struct cache_entry **hash_table;
374    bool modified;
375 
376    VkAllocationCallbacks alloc;
377 };
378 
379 struct radv_shader_binary;
380 struct radv_shader;
381 struct radv_pipeline_shader_stack_size;
382 
383 void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
384 void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
385 bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
386 
387 bool radv_create_shaders_from_pipeline_cache(
388    struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
389    struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
390    uint32_t *num_stack_sizes, bool *found_in_application_cache);
391 
392 void radv_pipeline_cache_insert_shaders(
393    struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
394    struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
395    const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
396 
397 VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
398                              struct radv_shader_binary **binaries,
399                              struct radv_shader_binary *gs_copy_binary);
400 
401 enum radv_blit_ds_layout {
402    RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
403    RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
404    RADV_BLIT_DS_LAYOUT_COUNT,
405 };
406 
407 static inline enum radv_blit_ds_layout
radv_meta_blit_ds_to_type(VkImageLayout layout)408 radv_meta_blit_ds_to_type(VkImageLayout layout)
409 {
410    return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
411                                               : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
412 }
413 
414 static inline VkImageLayout
radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)415 radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
416 {
417    return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
418                                                        : VK_IMAGE_LAYOUT_GENERAL;
419 }
420 
421 enum radv_meta_dst_layout {
422    RADV_META_DST_LAYOUT_GENERAL,
423    RADV_META_DST_LAYOUT_OPTIMAL,
424    RADV_META_DST_LAYOUT_COUNT,
425 };
426 
427 static inline enum radv_meta_dst_layout
radv_meta_dst_layout_from_layout(VkImageLayout layout)428 radv_meta_dst_layout_from_layout(VkImageLayout layout)
429 {
430    return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
431                                               : RADV_META_DST_LAYOUT_OPTIMAL;
432 }
433 
434 static inline VkImageLayout
radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)435 radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
436 {
437    return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
438                                                  : VK_IMAGE_LAYOUT_GENERAL;
439 }
440 
441 struct radv_meta_state {
442    VkAllocationCallbacks alloc;
443 
444    struct radv_pipeline_cache cache;
445 
446    /*
447     * For on-demand pipeline creation, makes sure that
448     * only one thread tries to build a pipeline at the same time.
449     */
450    mtx_t mtx;
451 
452    /**
453     * Use array element `i` for images with `2^i` samples.
454     */
455    struct {
456       VkRenderPass render_pass[NUM_META_FS_KEYS];
457       VkPipeline color_pipelines[NUM_META_FS_KEYS];
458    } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
459 
460    struct {
461       VkRenderPass depthstencil_rp;
462       VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
463       VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
464       VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
465 
466       VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
467       VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
468       VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
469    } ds_clear[MAX_SAMPLES_LOG2];
470 
471    VkPipelineLayout clear_color_p_layout;
472    VkPipelineLayout clear_depth_p_layout;
473    VkPipelineLayout clear_depth_unrestricted_p_layout;
474 
475    /* Optimized compute fast HTILE clear for stencil or depth only. */
476    VkPipeline clear_htile_mask_pipeline;
477    VkPipelineLayout clear_htile_mask_p_layout;
478    VkDescriptorSetLayout clear_htile_mask_ds_layout;
479 
480    /* Copy VRS into HTILE. */
481    VkPipeline copy_vrs_htile_pipeline;
482    VkPipelineLayout copy_vrs_htile_p_layout;
483    VkDescriptorSetLayout copy_vrs_htile_ds_layout;
484 
485    /* Clear DCC with comp-to-single. */
486    VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
487    VkPipelineLayout clear_dcc_comp_to_single_p_layout;
488    VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
489 
490    struct {
491       VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
492 
493       /** Pipeline that blits from a 1D image. */
494       VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
495 
496       /** Pipeline that blits from a 2D image. */
497       VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
498 
499       /** Pipeline that blits from a 3D image. */
500       VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
501 
502       VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
503       VkPipeline depth_only_1d_pipeline;
504       VkPipeline depth_only_2d_pipeline;
505       VkPipeline depth_only_3d_pipeline;
506 
507       VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
508       VkPipeline stencil_only_1d_pipeline;
509       VkPipeline stencil_only_2d_pipeline;
510       VkPipeline stencil_only_3d_pipeline;
511       VkPipelineLayout pipeline_layout;
512       VkDescriptorSetLayout ds_layout;
513    } blit;
514 
515    struct {
516       VkPipelineLayout p_layouts[5];
517       VkDescriptorSetLayout ds_layouts[5];
518       VkPipeline pipelines[5][NUM_META_FS_KEYS];
519 
520       VkPipeline depth_only_pipeline[5];
521 
522       VkPipeline stencil_only_pipeline[5];
523    } blit2d[MAX_SAMPLES_LOG2];
524 
525    VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
526    VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
527    VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
528 
529    struct {
530       VkPipelineLayout img_p_layout;
531       VkDescriptorSetLayout img_ds_layout;
532       VkPipeline pipeline;
533       VkPipeline pipeline_3d;
534    } itob;
535    struct {
536       VkPipelineLayout img_p_layout;
537       VkDescriptorSetLayout img_ds_layout;
538       VkPipeline pipeline;
539       VkPipeline pipeline_3d;
540    } btoi;
541    struct {
542       VkPipelineLayout img_p_layout;
543       VkDescriptorSetLayout img_ds_layout;
544       VkPipeline pipeline;
545    } btoi_r32g32b32;
546    struct {
547       VkPipelineLayout img_p_layout;
548       VkDescriptorSetLayout img_ds_layout;
549       VkPipeline pipeline[MAX_SAMPLES_LOG2];
550       VkPipeline pipeline_3d;
551    } itoi;
552    struct {
553       VkPipelineLayout img_p_layout;
554       VkDescriptorSetLayout img_ds_layout;
555       VkPipeline pipeline;
556    } itoi_r32g32b32;
557    struct {
558       VkPipelineLayout img_p_layout;
559       VkDescriptorSetLayout img_ds_layout;
560       VkPipeline pipeline[MAX_SAMPLES_LOG2];
561       VkPipeline pipeline_3d;
562    } cleari;
563    struct {
564       VkPipelineLayout img_p_layout;
565       VkDescriptorSetLayout img_ds_layout;
566       VkPipeline pipeline;
567    } cleari_r32g32b32;
568    struct {
569       VkPipelineLayout p_layout;
570       VkDescriptorSetLayout ds_layout;
571       VkPipeline pipeline[MAX_SAMPLES_LOG2];
572    } fmask_copy;
573 
574    struct {
575       VkPipelineLayout p_layout;
576       VkPipeline pipeline[NUM_META_FS_KEYS];
577       VkRenderPass pass[NUM_META_FS_KEYS];
578    } resolve;
579 
580    struct {
581       VkDescriptorSetLayout ds_layout;
582       VkPipelineLayout p_layout;
583       struct {
584          VkPipeline pipeline;
585          VkPipeline i_pipeline;
586          VkPipeline srgb_pipeline;
587       } rc[MAX_SAMPLES_LOG2];
588 
589       VkPipeline depth_zero_pipeline;
590       struct {
591          VkPipeline average_pipeline;
592          VkPipeline max_pipeline;
593          VkPipeline min_pipeline;
594       } depth[MAX_SAMPLES_LOG2];
595 
596       VkPipeline stencil_zero_pipeline;
597       struct {
598          VkPipeline max_pipeline;
599          VkPipeline min_pipeline;
600       } stencil[MAX_SAMPLES_LOG2];
601    } resolve_compute;
602 
603    struct {
604       VkDescriptorSetLayout ds_layout;
605       VkPipelineLayout p_layout;
606 
607       struct {
608          VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
609          VkPipeline pipeline[NUM_META_FS_KEYS];
610       } rc[MAX_SAMPLES_LOG2];
611 
612       VkRenderPass depth_render_pass;
613       VkPipeline depth_zero_pipeline;
614       struct {
615          VkPipeline average_pipeline;
616          VkPipeline max_pipeline;
617          VkPipeline min_pipeline;
618       } depth[MAX_SAMPLES_LOG2];
619 
620       VkRenderPass stencil_render_pass;
621       VkPipeline stencil_zero_pipeline;
622       struct {
623          VkPipeline max_pipeline;
624          VkPipeline min_pipeline;
625       } stencil[MAX_SAMPLES_LOG2];
626    } resolve_fragment;
627 
628    struct {
629       VkPipelineLayout p_layout;
630       VkPipeline decompress_pipeline;
631       VkPipeline resummarize_pipeline;
632       VkRenderPass pass;
633    } depth_decomp[MAX_SAMPLES_LOG2];
634 
635    VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
636    VkPipelineLayout expand_depth_stencil_compute_p_layout;
637    VkPipeline expand_depth_stencil_compute_pipeline;
638 
639    struct {
640       VkPipelineLayout p_layout;
641       VkPipeline cmask_eliminate_pipeline;
642       VkPipeline fmask_decompress_pipeline;
643       VkPipeline dcc_decompress_pipeline;
644       VkRenderPass pass;
645 
646       VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
647       VkPipelineLayout dcc_decompress_compute_p_layout;
648       VkPipeline dcc_decompress_compute_pipeline;
649    } fast_clear_flush;
650 
651    struct {
652       VkPipelineLayout fill_p_layout;
653       VkPipelineLayout copy_p_layout;
654       VkDescriptorSetLayout fill_ds_layout;
655       VkDescriptorSetLayout copy_ds_layout;
656       VkPipeline fill_pipeline;
657       VkPipeline copy_pipeline;
658    } buffer;
659 
660    struct {
661       VkDescriptorSetLayout ds_layout;
662       VkPipelineLayout p_layout;
663       VkPipeline occlusion_query_pipeline;
664       VkPipeline pipeline_statistics_query_pipeline;
665       VkPipeline tfb_query_pipeline;
666       VkPipeline timestamp_query_pipeline;
667    } query;
668 
669    struct {
670       VkDescriptorSetLayout ds_layout;
671       VkPipelineLayout p_layout;
672       VkPipeline pipeline[MAX_SAMPLES_LOG2];
673    } fmask_expand;
674 
675    struct {
676       VkDescriptorSetLayout ds_layout;
677       VkPipelineLayout p_layout;
678       VkPipeline pipeline[32];
679    } dcc_retile;
680 
681    struct {
682       VkPipelineLayout leaf_p_layout;
683       VkPipeline leaf_pipeline;
684       VkPipelineLayout internal_p_layout;
685       VkPipeline internal_pipeline;
686       VkPipelineLayout copy_p_layout;
687       VkPipeline copy_pipeline;
688    } accel_struct_build;
689 
690    struct {
691       VkDescriptorSetLayout ds_layout;
692       VkPipelineLayout p_layout;
693       VkPipeline pipeline;
694    } etc_decode;
695 };
696 
697 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
698 
699 struct radv_deferred_queue_submission;
700 
701 static inline enum radv_queue_family
vk_queue_to_radv(struct radv_physical_device * phys_dev,int queue_family_index)702 vk_queue_to_radv(struct radv_physical_device *phys_dev,
703                  int queue_family_index)
704 {
705    assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
706    return phys_dev->vk_queue_to_radv[queue_family_index];
707 }
708 
709 enum ring_type radv_queue_family_to_ring(struct radv_physical_device *physical_device,
710                                          enum radv_queue_family f);
711 
712 struct radv_queue {
713    struct vk_queue vk;
714    struct radv_device *device;
715    struct radeon_winsys_ctx *hw_ctx;
716    enum radeon_ctx_priority priority;
717 
718    enum radv_queue_family qf;
719    uint32_t scratch_size_per_wave;
720    uint32_t scratch_waves;
721    uint32_t compute_scratch_size_per_wave;
722    uint32_t compute_scratch_waves;
723    uint32_t esgs_ring_size;
724    uint32_t gsvs_ring_size;
725    bool has_tess_rings;
726    bool has_gds;
727    bool has_gds_oa;
728    bool has_sample_positions;
729 
730    struct radeon_winsys_bo *scratch_bo;
731    struct radeon_winsys_bo *descriptor_bo;
732    struct radeon_winsys_bo *compute_scratch_bo;
733    struct radeon_winsys_bo *esgs_ring_bo;
734    struct radeon_winsys_bo *gsvs_ring_bo;
735    struct radeon_winsys_bo *tess_rings_bo;
736    struct radeon_winsys_bo *gds_bo;
737    struct radeon_winsys_bo *gds_oa_bo;
738    struct radeon_cmdbuf *initial_preamble_cs;
739    struct radeon_cmdbuf *initial_full_flush_preamble_cs;
740    struct radeon_cmdbuf *continue_preamble_cs;
741 };
742 
743 #define RADV_BORDER_COLOR_COUNT       4096
744 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
745 
746 struct radv_device_border_color_data {
747    bool used[RADV_BORDER_COLOR_COUNT];
748 
749    struct radeon_winsys_bo *bo;
750    VkClearColorValue *colors_gpu_ptr;
751 
752    /* Mutex is required to guarantee vkCreateSampler thread safety
753     * given that we are writing to a buffer and checking color occupation */
754    mtx_t mutex;
755 };
756 
757 enum radv_force_vrs {
758    RADV_FORCE_VRS_1x1 = 0,
759    RADV_FORCE_VRS_2x2,
760    RADV_FORCE_VRS_2x1,
761    RADV_FORCE_VRS_1x2,
762 };
763 
764 struct radv_notifier {
765    int fd;
766    int watch;
767    bool quit;
768    thrd_t thread;
769 };
770 
771 struct radv_device {
772    struct vk_device vk;
773 
774    struct radv_instance *instance;
775    struct radeon_winsys *ws;
776 
777    struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
778    struct radv_meta_state meta_state;
779 
780    struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
781    int queue_count[RADV_MAX_QUEUE_FAMILIES];
782 
783    bool pbb_allowed;
784    uint32_t tess_offchip_block_dw_size;
785    uint32_t scratch_waves;
786    uint32_t dispatch_initiator;
787 
788    uint32_t gs_table_depth;
789 
790    /* MSAA sample locations.
791     * The first index is the sample index.
792     * The second index is the coordinate: X, Y. */
793    float sample_locations_1x[1][2];
794    float sample_locations_2x[2][2];
795    float sample_locations_4x[4][2];
796    float sample_locations_8x[8][2];
797 
798    /* GFX7 and later */
799    uint32_t gfx_init_size_dw;
800    struct radeon_winsys_bo *gfx_init;
801 
802    struct radeon_winsys_bo *trace_bo;
803    uint32_t *trace_id_ptr;
804 
805    /* Whether to keep shader debug info, for debugging. */
806    bool keep_shader_info;
807 
808    struct radv_physical_device *physical_device;
809 
810    /* Backup in-memory cache to be used if the app doesn't provide one */
811    struct radv_pipeline_cache *mem_cache;
812 
813    /*
814     * use different counters so MSAA MRTs get consecutive surface indices,
815     * even if MASK is allocated in between.
816     */
817    uint32_t image_mrt_offset_counter;
818    uint32_t fmask_mrt_offset_counter;
819 
820    struct list_head shader_arenas;
821    unsigned shader_arena_shift;
822    uint8_t shader_free_list_mask;
823    struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS];
824    struct list_head shader_block_obj_pool;
825    mtx_t shader_arena_mutex;
826 
827    /* For detecting VM faults reported by dmesg. */
828    uint64_t dmesg_timestamp;
829 
830    /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
831    bool robust_buffer_access;
832    bool robust_buffer_access2;
833 
834    /* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
835     * on some GFX10.3 chips.
836     */
837    bool adjust_frag_coord_z;
838 
839    /* Whether to inline the compute dispatch size in user sgprs. */
840    bool load_grid_size_from_user_sgpr;
841 
842    /* Whether the driver uses a global BO list. */
843    bool use_global_bo_list;
844 
845    /* Whether attachment VRS is enabled. */
846    bool attachment_vrs_enabled;
847 
848    /* Whether shader image 32-bit float atomics are enabled. */
849    bool image_float32_atomics;
850 
851    /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
852    int force_aniso;
853 
854    struct radv_device_border_color_data border_color_data;
855 
856    /* Thread trace. */
857    struct ac_thread_trace_data thread_trace;
858 
859    /* SPM. */
860    struct ac_spm_trace_data spm_trace;
861 
862    /* Performance counters. */
863    struct ac_perfcounters perfcounters;
864 
865    /* Trap handler. */
866    struct radv_trap_handler_shader *trap_handler_shader;
867    struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
868    uint32_t *tma_ptr;
869 
870    /* Overallocation. */
871    bool overallocation_disallowed;
872    uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
873    mtx_t overallocation_mutex;
874 
875    /* RADV_FORCE_VRS. */
876    struct radv_notifier notifier;
877    enum radv_force_vrs force_vrs;
878 
879    /* Depth image for VRS when not bound by the app. */
880    struct {
881       struct radv_image *image;
882       struct radv_buffer *buffer; /* HTILE */
883       struct radv_device_memory *mem;
884    } vrs;
885 
886    struct u_rwlock vs_prologs_lock;
887    struct hash_table *vs_prologs;
888 
889    /* Prime blit sdma queue */
890    struct radv_queue *private_sdma_queue;
891 
892    struct radv_shader_prolog *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
893    struct radv_shader_prolog *instance_rate_vs_prologs[816];
894 
895    simple_mtx_t trace_mtx;
896 
897    /* Whether per-vertex VRS is forced. */
898    bool force_vrs_enabled;
899 };
900 
901 struct radv_device_memory {
902    struct vk_object_base base;
903    struct radeon_winsys_bo *bo;
904    /* for dedicated allocations */
905    struct radv_image *image;
906    struct radv_buffer *buffer;
907    uint32_t heap_index;
908    uint64_t alloc_size;
909    void *map;
910    void *user_ptr;
911 
912 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
913    struct AHardwareBuffer *android_hardware_buffer;
914 #endif
915 };
916 
917 void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
918                              struct radeon_winsys_bo *bo);
919 void radv_device_memory_finish(struct radv_device_memory *mem);
920 
921 struct radv_descriptor_range {
922    uint64_t va;
923    uint32_t size;
924 };
925 
926 struct radv_descriptor_set_header {
927    struct vk_object_base base;
928    struct radv_descriptor_set_layout *layout;
929    uint32_t size;
930    uint32_t buffer_count;
931 
932    struct radeon_winsys_bo *bo;
933    uint64_t va;
934    uint32_t *mapped_ptr;
935    struct radv_descriptor_range *dynamic_descriptors;
936 };
937 
938 struct radv_descriptor_set {
939    struct radv_descriptor_set_header header;
940 
941    struct radeon_winsys_bo *descriptors[];
942 };
943 
944 struct radv_push_descriptor_set {
945    struct radv_descriptor_set_header set;
946    uint32_t capacity;
947 };
948 
949 struct radv_descriptor_pool_entry {
950    uint32_t offset;
951    uint32_t size;
952    struct radv_descriptor_set *set;
953 };
954 
955 struct radv_descriptor_pool {
956    struct vk_object_base base;
957    struct radeon_winsys_bo *bo;
958    uint8_t *host_bo;
959    uint8_t *mapped_ptr;
960    uint64_t current_offset;
961    uint64_t size;
962 
963    uint8_t *host_memory_base;
964    uint8_t *host_memory_ptr;
965    uint8_t *host_memory_end;
966 
967    uint32_t entry_count;
968    uint32_t max_entry_count;
969    struct radv_descriptor_pool_entry entries[0];
970 };
971 
972 struct radv_descriptor_update_template_entry {
973    VkDescriptorType descriptor_type;
974 
975    /* The number of descriptors to update */
976    uint32_t descriptor_count;
977 
978    /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
979    uint32_t dst_offset;
980 
981    /* In dwords. Not valid/used for dynamic descriptors */
982    uint32_t dst_stride;
983 
984    uint32_t buffer_offset;
985 
986    /* Only valid for combined image samplers and samplers */
987    uint8_t has_sampler;
988    uint8_t sampler_offset;
989 
990    /* In bytes */
991    size_t src_offset;
992    size_t src_stride;
993 
994    /* For push descriptors */
995    const uint32_t *immutable_samplers;
996 };
997 
998 struct radv_descriptor_update_template {
999    struct vk_object_base base;
1000    uint32_t entry_count;
1001    VkPipelineBindPoint bind_point;
1002    struct radv_descriptor_update_template_entry entry[0];
1003 };
1004 
1005 void radv_descriptor_set_layout_destroy(struct radv_device *device,
1006                                         struct radv_descriptor_set_layout *set_layout);
1007 
1008 static inline void
radv_descriptor_set_layout_ref(struct radv_descriptor_set_layout * set_layout)1009 radv_descriptor_set_layout_ref(struct radv_descriptor_set_layout *set_layout)
1010 {
1011    assert(set_layout && set_layout->ref_cnt >= 1);
1012    p_atomic_inc(&set_layout->ref_cnt);
1013 }
1014 
1015 static inline void
radv_descriptor_set_layout_unref(struct radv_device * device,struct radv_descriptor_set_layout * set_layout)1016 radv_descriptor_set_layout_unref(struct radv_device *device,
1017                                  struct radv_descriptor_set_layout *set_layout)
1018 {
1019    assert(set_layout && set_layout->ref_cnt >= 1);
1020    if (p_atomic_dec_zero(&set_layout->ref_cnt))
1021       radv_descriptor_set_layout_destroy(device, set_layout);
1022 }
1023 
1024 struct radv_buffer {
1025    struct vk_object_base base;
1026    VkDeviceSize size;
1027 
1028    VkBufferUsageFlags usage;
1029    VkBufferCreateFlags flags;
1030 
1031    /* Set when bound */
1032    struct radeon_winsys_bo *bo;
1033    VkDeviceSize offset;
1034 };
1035 
1036 void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
1037                       struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
1038 void radv_buffer_finish(struct radv_buffer *buffer);
1039 
1040 enum radv_dynamic_state_bits {
1041    RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1042    RADV_DYNAMIC_SCISSOR = 1ull << 1,
1043    RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1044    RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1045    RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1046    RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1047    RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1048    RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1049    RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1050    RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1051    RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1052    RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1053    RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1054    RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1055    RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1056    RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1057    RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1058    RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1059    RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1060    RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1061    RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1062    RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1063    RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1064    RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1065    RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1066    RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1067    RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1068    RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1069    RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1070    RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1071    RADV_DYNAMIC_ALL = (1ull << 30) - 1,
1072 };
1073 
1074 enum radv_cmd_dirty_bits {
1075    /* Keep the dynamic state dirty bits in sync with
1076     * enum radv_dynamic_state_bits */
1077    RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1078    RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1079    RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1080    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1081    RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1082    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1083    RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1084    RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1085    RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1086    RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1087    RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1088    RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1089    RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1090    RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1091    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1092    RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1093    RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1094    RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1095    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1096    RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1097    RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1098    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1099    RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1100    RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1101    RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1102    RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1103    RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1104    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1105    RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1106    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1107    RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1,
1108    RADV_CMD_DIRTY_PIPELINE = 1ull << 30,
1109    RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31,
1110    RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
1111    RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
1112    RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
1113 };
1114 
1115 enum radv_cmd_flush_bits {
1116    /* Instruction cache. */
1117    RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1118    /* Scalar L1 cache. */
1119    RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1120    /* Vector L1 cache. */
1121    RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1122    /* L2 cache + L2 metadata cache writeback & invalidate.
1123     * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1124    RADV_CMD_FLAG_INV_L2 = 1 << 3,
1125    /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1126     * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1127     * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1128    RADV_CMD_FLAG_WB_L2 = 1 << 4,
1129    /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1130     * changed and we want to read an image from shaders. */
1131    RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1132    /* Framebuffer caches */
1133    RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1134    RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1135    RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1136    RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1137    /* Engine synchronization. */
1138    RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1139    RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1140    RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1141    RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1142    /* Pipeline query controls. */
1143    RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1144    RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1145    RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1146 
1147    RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
1148       (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1149        RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
1150 };
1151 
1152 struct radv_vertex_binding {
1153    struct radv_buffer *buffer;
1154    VkDeviceSize offset;
1155    VkDeviceSize size;
1156    VkDeviceSize stride;
1157 };
1158 
1159 struct radv_streamout_binding {
1160    struct radv_buffer *buffer;
1161    VkDeviceSize offset;
1162    VkDeviceSize size;
1163 };
1164 
1165 struct radv_streamout_state {
1166    /* Mask of bound streamout buffers. */
1167    uint8_t enabled_mask;
1168 
1169    /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1170    uint32_t hw_enabled_mask;
1171 
1172    /* State of VGT_STRMOUT_(CONFIG|EN) */
1173    bool streamout_enabled;
1174 };
1175 
1176 struct radv_viewport_state {
1177    uint32_t count;
1178    VkViewport viewports[MAX_VIEWPORTS];
1179    struct {
1180       float scale[3];
1181       float translate[3];
1182    } xform[MAX_VIEWPORTS];
1183 };
1184 
1185 struct radv_scissor_state {
1186    uint32_t count;
1187    VkRect2D scissors[MAX_SCISSORS];
1188 };
1189 
1190 struct radv_discard_rectangle_state {
1191    uint32_t count;
1192    VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
1193 };
1194 
1195 struct radv_sample_locations_state {
1196    VkSampleCountFlagBits per_pixel;
1197    VkExtent2D grid_size;
1198    uint32_t count;
1199    VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1200 };
1201 
1202 struct radv_dynamic_state {
1203    /**
1204     * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1205     * Defines the set of saved dynamic state.
1206     */
1207    uint64_t mask;
1208 
1209    struct radv_viewport_state viewport;
1210 
1211    struct radv_scissor_state scissor;
1212 
1213    float line_width;
1214 
1215    struct {
1216       float bias;
1217       float clamp;
1218       float slope;
1219    } depth_bias;
1220 
1221    float blend_constants[4];
1222 
1223    struct {
1224       float min;
1225       float max;
1226    } depth_bounds;
1227 
1228    struct {
1229       uint32_t front;
1230       uint32_t back;
1231    } stencil_compare_mask;
1232 
1233    struct {
1234       uint32_t front;
1235       uint32_t back;
1236    } stencil_write_mask;
1237 
1238    struct {
1239       struct {
1240          VkStencilOp fail_op;
1241          VkStencilOp pass_op;
1242          VkStencilOp depth_fail_op;
1243          VkCompareOp compare_op;
1244       } front;
1245 
1246       struct {
1247          VkStencilOp fail_op;
1248          VkStencilOp pass_op;
1249          VkStencilOp depth_fail_op;
1250          VkCompareOp compare_op;
1251       } back;
1252    } stencil_op;
1253 
1254    struct {
1255       uint32_t front;
1256       uint32_t back;
1257    } stencil_reference;
1258 
1259    struct radv_discard_rectangle_state discard_rectangle;
1260 
1261    struct radv_sample_locations_state sample_location;
1262 
1263    struct {
1264       uint32_t factor;
1265       uint16_t pattern;
1266    } line_stipple;
1267 
1268    VkCullModeFlags cull_mode;
1269    VkFrontFace front_face;
1270    unsigned primitive_topology;
1271 
1272    bool depth_test_enable;
1273    bool depth_write_enable;
1274    VkCompareOp depth_compare_op;
1275    bool depth_bounds_test_enable;
1276    bool stencil_test_enable;
1277 
1278    struct {
1279       VkExtent2D size;
1280       VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
1281    } fragment_shading_rate;
1282 
1283    bool depth_bias_enable;
1284    bool primitive_restart_enable;
1285    bool rasterizer_discard_enable;
1286 
1287    unsigned logic_op;
1288 
1289    uint32_t color_write_enable;
1290 };
1291 
1292 extern const struct radv_dynamic_state default_dynamic_state;
1293 
1294 const char *radv_get_debug_option_name(int id);
1295 
1296 const char *radv_get_perftest_option_name(int id);
1297 
1298 int radv_get_int_debug_option(const char *name, int default_value);
1299 
1300 struct radv_color_buffer_info {
1301    uint64_t cb_color_base;
1302    uint64_t cb_color_cmask;
1303    uint64_t cb_color_fmask;
1304    uint64_t cb_dcc_base;
1305    uint32_t cb_color_slice;
1306    uint32_t cb_color_view;
1307    uint32_t cb_color_info;
1308    uint32_t cb_color_attrib;
1309    uint32_t cb_color_attrib2; /* GFX9 and later */
1310    uint32_t cb_color_attrib3; /* GFX10 and later */
1311    uint32_t cb_dcc_control;
1312    uint32_t cb_color_cmask_slice;
1313    uint32_t cb_color_fmask_slice;
1314    union {
1315       uint32_t cb_color_pitch; // GFX6-GFX8
1316       uint32_t cb_mrt_epitch;  // GFX9+
1317    };
1318 };
1319 
1320 struct radv_ds_buffer_info {
1321    uint64_t db_z_read_base;
1322    uint64_t db_stencil_read_base;
1323    uint64_t db_z_write_base;
1324    uint64_t db_stencil_write_base;
1325    uint64_t db_htile_data_base;
1326    uint32_t db_depth_info;
1327    uint32_t db_z_info;
1328    uint32_t db_stencil_info;
1329    uint32_t db_depth_view;
1330    uint32_t db_depth_size;
1331    uint32_t db_depth_slice;
1332    uint32_t db_htile_surface;
1333    uint32_t pa_su_poly_offset_db_fmt_cntl;
1334    uint32_t db_z_info2;       /* GFX9 only */
1335    uint32_t db_stencil_info2; /* GFX9 only */
1336 };
1337 
1338 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1339                                    struct radv_image_view *iview);
1340 void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
1341                                 struct radv_image_view *iview);
1342 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1343                                  struct radv_ds_buffer_info *ds);
1344 
1345 /**
1346  * Attachment state when recording a renderpass instance.
1347  *
1348  * The clear value is valid only if there exists a pending clear.
1349  */
1350 struct radv_attachment_state {
1351    VkImageAspectFlags pending_clear_aspects;
1352    uint32_t cleared_views;
1353    VkClearValue clear_value;
1354    VkImageLayout current_layout;
1355    VkImageLayout current_stencil_layout;
1356    bool current_in_render_loop;
1357    bool disable_dcc;
1358    struct radv_sample_locations_state sample_location;
1359 
1360    union {
1361       struct radv_color_buffer_info cb;
1362       struct radv_ds_buffer_info ds;
1363    };
1364    struct radv_image_view *iview;
1365 };
1366 
1367 struct radv_descriptor_state {
1368    struct radv_descriptor_set *sets[MAX_SETS];
1369    uint32_t dirty;
1370    uint32_t valid;
1371    struct radv_push_descriptor_set push_set;
1372    bool push_dirty;
1373    uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1374 };
1375 
1376 struct radv_subpass_sample_locs_state {
1377    uint32_t subpass_idx;
1378    struct radv_sample_locations_state sample_location;
1379 };
1380 
1381 enum rgp_flush_bits {
1382    RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1383    RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1384    RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1385    RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1386    RGP_FLUSH_PFP_SYNC_ME = 0x10,
1387    RGP_FLUSH_SYNC_CP_DMA = 0x20,
1388    RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1389    RGP_FLUSH_INVAL_ICACHE = 0x80,
1390    RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1391    RGP_FLUSH_FLUSH_L2 = 0x200,
1392    RGP_FLUSH_INVAL_L2 = 0x400,
1393    RGP_FLUSH_FLUSH_CB = 0x800,
1394    RGP_FLUSH_INVAL_CB = 0x1000,
1395    RGP_FLUSH_FLUSH_DB = 0x2000,
1396    RGP_FLUSH_INVAL_DB = 0x4000,
1397    RGP_FLUSH_INVAL_L1 = 0x8000,
1398 };
1399 
1400 struct radv_cmd_state {
1401    /* Vertex descriptors */
1402    uint64_t vb_va;
1403 
1404    bool predicating;
1405    uint64_t dirty;
1406 
1407    uint32_t prefetch_L2_mask;
1408 
1409    struct radv_pipeline *pipeline;
1410    struct radv_pipeline *emitted_pipeline;
1411    struct radv_pipeline *compute_pipeline;
1412    struct radv_pipeline *emitted_compute_pipeline;
1413    struct radv_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1414    struct radv_framebuffer *framebuffer;
1415    struct radv_render_pass *pass;
1416    const struct radv_subpass *subpass;
1417    struct radv_dynamic_state dynamic;
1418    struct radv_vs_input_state dynamic_vs_input;
1419    struct radv_attachment_state *attachments;
1420    struct radv_streamout_state streamout;
1421    VkRect2D render_area;
1422 
1423    uint32_t num_subpass_sample_locs;
1424    struct radv_subpass_sample_locs_state *subpass_sample_locs;
1425 
1426    /* Index buffer */
1427    struct radv_buffer *index_buffer;
1428    uint64_t index_offset;
1429    uint32_t index_type;
1430    uint32_t max_index_count;
1431    uint64_t index_va;
1432    int32_t last_index_type;
1433 
1434    int32_t last_primitive_reset_en;
1435    uint32_t last_primitive_reset_index;
1436    enum radv_cmd_flush_bits flush_bits;
1437    unsigned active_occlusion_queries;
1438    bool perfect_occlusion_queries_enabled;
1439    unsigned active_pipeline_queries;
1440    unsigned active_pipeline_gds_queries;
1441    uint32_t trace_id;
1442    uint32_t last_ia_multi_vgt_param;
1443 
1444    uint32_t last_num_instances;
1445    uint32_t last_first_instance;
1446    uint32_t last_vertex_offset;
1447    uint32_t last_drawid;
1448 
1449    uint32_t last_sx_ps_downconvert;
1450    uint32_t last_sx_blend_opt_epsilon;
1451    uint32_t last_sx_blend_opt_control;
1452 
1453    /* Whether CP DMA is busy/idle. */
1454    bool dma_is_busy;
1455 
1456    /* Whether any images that are not L2 coherent are dirty from the CB. */
1457    bool rb_noncoherent_dirty;
1458 
1459    /* Conditional rendering info. */
1460    uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1461    int predication_type;   /* -1: disabled, 0: normal, 1: inverted */
1462    uint64_t predication_va;
1463 
1464    /* Inheritance info. */
1465    VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1466 
1467    bool context_roll_without_scissor_emitted;
1468 
1469    /* SQTT related state. */
1470    uint32_t current_event_type;
1471    uint32_t num_events;
1472    uint32_t num_layout_transitions;
1473    bool pending_sqtt_barrier_end;
1474    enum rgp_flush_bits sqtt_flush_bits;
1475 
1476    /* NGG culling state. */
1477    uint32_t last_nggc_settings;
1478    int8_t last_nggc_settings_sgpr_idx;
1479    bool last_nggc_skip;
1480 
1481    /* Mesh shading state. */
1482    bool mesh_shading;
1483 
1484    uint8_t cb_mip[MAX_RTS];
1485 
1486    /* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
1487    bool uses_draw_indirect_multi;
1488 
1489    uint32_t rt_stack_size;
1490 
1491    struct radv_shader_prolog *emitted_vs_prolog;
1492    uint32_t *emitted_vs_prolog_key;
1493    uint32_t emitted_vs_prolog_key_hash;
1494    uint32_t vbo_misaligned_mask;
1495    uint32_t vbo_bound_mask;
1496 
1497    /* Whether the cmdbuffer owns the current render pass rather than the app. */
1498    bool own_render_pass;
1499 
1500    /* Per-vertex VRS state. */
1501    uint32_t last_vrs_rates;
1502    int8_t last_vrs_rates_sgpr_idx;
1503 };
1504 
1505 struct radv_cmd_pool {
1506    struct vk_command_pool vk;
1507    struct list_head cmd_buffers;
1508    struct list_head free_cmd_buffers;
1509 };
1510 
1511 struct radv_cmd_buffer_upload {
1512    uint8_t *map;
1513    unsigned offset;
1514    uint64_t size;
1515    struct radeon_winsys_bo *upload_bo;
1516    struct list_head list;
1517 };
1518 
1519 enum radv_cmd_buffer_status {
1520    RADV_CMD_BUFFER_STATUS_INVALID,
1521    RADV_CMD_BUFFER_STATUS_INITIAL,
1522    RADV_CMD_BUFFER_STATUS_RECORDING,
1523    RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1524    RADV_CMD_BUFFER_STATUS_PENDING,
1525 };
1526 
1527 struct radv_cmd_buffer {
1528    struct vk_command_buffer vk;
1529 
1530    struct radv_device *device;
1531 
1532    struct radv_cmd_pool *pool;
1533    struct list_head pool_link;
1534 
1535    VkCommandBufferUsageFlags usage_flags;
1536    enum radv_cmd_buffer_status status;
1537    struct radeon_cmdbuf *cs;
1538    struct radv_cmd_state state;
1539    struct radv_vertex_binding vertex_bindings[MAX_VBS];
1540    struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1541    enum radv_queue_family qf;
1542 
1543    uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1544    VkShaderStageFlags push_constant_stages;
1545    struct radv_descriptor_set_header meta_push_descriptors;
1546 
1547    struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1548 
1549    struct radv_cmd_buffer_upload upload;
1550 
1551    uint32_t scratch_size_per_wave_needed;
1552    uint32_t scratch_waves_wanted;
1553    uint32_t compute_scratch_size_per_wave_needed;
1554    uint32_t compute_scratch_waves_wanted;
1555    uint32_t esgs_ring_size_needed;
1556    uint32_t gsvs_ring_size_needed;
1557    bool tess_rings_needed;
1558    bool gds_needed;    /* for GFX10 streamout and NGG GS queries */
1559    bool gds_oa_needed; /* for GFX10 streamout */
1560    bool sample_positions_needed;
1561 
1562    VkResult record_result;
1563 
1564    uint64_t gfx9_fence_va;
1565    uint32_t gfx9_fence_idx;
1566    uint64_t gfx9_eop_bug_va;
1567 
1568    /**
1569     * Whether a query pool has been resetted and we have to flush caches.
1570     */
1571    bool pending_reset_query;
1572 
1573    /**
1574     * Bitmask of pending active query flushes.
1575     */
1576    enum radv_cmd_flush_bits active_query_flush_bits;
1577 };
1578 
1579 struct radv_image;
1580 struct radv_image_view;
1581 
1582 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1583 
1584 void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1585 void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1586 
1587 void cik_create_gfx_config(struct radv_device *device);
1588 
1589 void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
1590                        const VkViewport *viewports, bool can_use_guardband);
1591 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
1592                                    bool indirect_draw, bool count_from_stream_output,
1593                                    uint32_t draw_vertex_count, unsigned topology,
1594                                    bool prim_restart_enable);
1595 void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
1596                                 unsigned event, unsigned event_flags, unsigned dst_sel,
1597                                 unsigned data_sel, uint64_t va, uint32_t new_fence,
1598                                 uint64_t gfx9_eop_bug_va);
1599 
1600 void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
1601                       uint32_t mask);
1602 void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
1603                             uint32_t *fence_ptr, uint64_t va, bool is_mec,
1604                             enum radv_cmd_flush_bits flush_bits,
1605                             enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
1606 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1607 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
1608                                    unsigned pred_op, uint64_t va);
1609 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
1610                            uint64_t size);
1611 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
1612 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
1613                             unsigned value);
1614 void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1615 
1616 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
1617 
1618 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
1619 uint32_t radv_hash_vs_prolog(const void *key_);
1620 bool radv_cmp_vs_prolog(const void *a_, const void *b_);
1621 
1622 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1623                                   unsigned *out_offset, void **ptr);
1624 void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1625                                  const struct radv_subpass *subpass);
1626 void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
1627                                      const struct radv_subpass *subpass);
1628 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1629                                  const void *data, unsigned *out_offset);
1630 
1631 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1632 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1633 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1634 void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
1635                                            VkImageAspectFlags aspects,
1636                                            VkResolveModeFlagBits resolve_mode);
1637 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1638 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
1639                                            VkImageAspectFlags aspects,
1640                                            VkResolveModeFlagBits resolve_mode);
1641 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
1642 unsigned radv_get_default_max_sample_dist(int log_samples);
1643 void radv_device_init_msaa(struct radv_device *device);
1644 VkResult radv_device_init_vrs_state(struct radv_device *device);
1645 
1646 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1647                                    const struct radv_image_view *iview,
1648                                    VkClearDepthStencilValue ds_clear_value,
1649                                    VkImageAspectFlags aspects);
1650 
1651 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1652                                       const struct radv_image_view *iview, int cb_idx,
1653                                       uint32_t color_values[2]);
1654 
1655 bool radv_image_use_dcc_image_stores(const struct radv_device *device,
1656                                      const struct radv_image *image);
1657 bool radv_image_use_dcc_predication(const struct radv_device *device,
1658                                     const struct radv_image *image);
1659 
1660 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1661                               const VkImageSubresourceRange *range, bool value);
1662 
1663 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1664                               const VkImageSubresourceRange *range, bool value);
1665 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
1666                                                VkAccessFlags2KHR src_flags,
1667                                                const struct radv_image *image);
1668 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
1669                                                VkAccessFlags2KHR dst_flags,
1670                                                const struct radv_image *image);
1671 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1672                           struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
1673                           uint32_t value);
1674 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
1675                       struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
1676                       uint64_t size);
1677 
1678 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1679 bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
1680 void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1681                       struct radv_device_memory *mem);
1682 
1683 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)1684 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
1685                               bool use_32bit_pointers)
1686 {
1687    radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1688    radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1689 }
1690 
1691 static inline void
radv_emit_shader_pointer_body(struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)1692 radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1693                               bool use_32bit_pointers)
1694 {
1695    radeon_emit(cs, va);
1696 
1697    if (use_32bit_pointers) {
1698       assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
1699    } else {
1700       radeon_emit(cs, va >> 32);
1701    }
1702 }
1703 
1704 static inline void
radv_emit_shader_pointer(struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)1705 radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
1706                          uint64_t va, bool global)
1707 {
1708    bool use_32bit_pointers = !global;
1709 
1710    radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1711    radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1712 }
1713 
1714 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1715 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
1716 {
1717    switch (bind_point) {
1718    case VK_PIPELINE_BIND_POINT_GRAPHICS:
1719    case VK_PIPELINE_BIND_POINT_COMPUTE:
1720       return &cmd_buffer->descriptors[bind_point];
1721    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1722       return &cmd_buffer->descriptors[2];
1723    default:
1724       unreachable("Unhandled bind point");
1725    }
1726 }
1727 
1728 void
1729 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
1730 
1731 /*
1732  * Takes x,y,z as exact numbers of invocations, instead of blocks.
1733  *
1734  * Limitations: Can't call normal dispatch functions without binding or rebinding
1735  *              the compute pipeline.
1736  */
1737 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
1738                              uint32_t z);
1739 
1740 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
1741                             uint64_t va);
1742 
1743 struct radv_event {
1744    struct vk_object_base base;
1745    struct radeon_winsys_bo *bo;
1746    uint64_t *map;
1747 };
1748 
1749 #define RADV_HASH_SHADER_CS_WAVE32         (1 << 1)
1750 #define RADV_HASH_SHADER_PS_WAVE32         (1 << 2)
1751 #define RADV_HASH_SHADER_GE_WAVE32         (1 << 3)
1752 #define RADV_HASH_SHADER_LLVM              (1 << 4)
1753 #define RADV_HASH_SHADER_KEEP_STATISTICS   (1 << 8)
1754 #define RADV_HASH_SHADER_USE_NGG_CULLING   (1 << 13)
1755 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
1756 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
1757 #define RADV_HASH_SHADER_FORCE_EMULATE_RT      (1 << 16)
1758 #define RADV_HASH_SHADER_SPLIT_FMA             (1 << 17)
1759 #define RADV_HASH_SHADER_RT_WAVE64             (1 << 18)
1760 
1761 struct radv_pipeline_key;
1762 
1763 void radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,
1764                        const struct radv_pipeline_layout *layout,
1765                        const struct radv_pipeline_key *key, uint32_t flags);
1766 
1767 void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1768                           uint32_t flags);
1769 
1770 uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
1771 
1772 bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
1773 
1774 #define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
1775 
1776 #define radv_foreach_stage(stage, stage_bits)                                                      \
1777    for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK);            \
1778         stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1779 
1780 extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1781 unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
1782 
1783 struct radv_multisample_state {
1784    uint32_t db_eqaa;
1785    uint32_t pa_sc_mode_cntl_0;
1786    uint32_t pa_sc_mode_cntl_1;
1787    uint32_t pa_sc_aa_config;
1788    uint32_t pa_sc_aa_mask[2];
1789    unsigned num_samples;
1790 };
1791 
1792 struct radv_vrs_state {
1793    uint32_t pa_cl_vrs_cntl;
1794 };
1795 
1796 struct radv_prim_vertex_count {
1797    uint8_t min;
1798    uint8_t incr;
1799 };
1800 
1801 struct radv_ia_multi_vgt_param_helpers {
1802    uint32_t base;
1803    bool partial_es_wave;
1804    uint8_t primgroup_size;
1805    bool ia_switch_on_eoi;
1806    bool partial_vs_wave;
1807 };
1808 
1809 struct radv_binning_state {
1810    uint32_t pa_sc_binner_cntl_0;
1811 };
1812 
1813 #define SI_GS_PER_ES 128
1814 
1815 enum radv_pipeline_type {
1816    RADV_PIPELINE_GRAPHICS,
1817    /* Compute pipeline (incl raytracing pipeline) */
1818    RADV_PIPELINE_COMPUTE,
1819    /* Pipeline library. This can't actually run and merely is a partial pipeline. */
1820    RADV_PIPELINE_LIBRARY
1821 };
1822 
1823 struct radv_pipeline_group_handle {
1824    uint32_t handles[2];
1825 };
1826 
1827 struct radv_pipeline_shader_stack_size {
1828    uint32_t recursive_size;
1829    /* anyhit + intersection */
1830    uint32_t non_recursive_size;
1831 };
1832 
1833 struct radv_pipeline_slab {
1834    uint32_t ref_count;
1835 
1836    union radv_shader_arena_block *alloc;
1837 };
1838 
1839 void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
1840 
1841 struct radv_pipeline {
1842    struct vk_object_base base;
1843    enum radv_pipeline_type type;
1844 
1845    struct radv_device *device;
1846    struct radv_dynamic_state dynamic_state;
1847 
1848    struct radv_pipeline_slab *slab;
1849 
1850    bool need_indirect_descriptor_sets;
1851    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
1852    struct radv_shader *gs_copy_shader;
1853    VkShaderStageFlags active_stages;
1854 
1855    struct radeon_cmdbuf cs;
1856    uint32_t ctx_cs_hash;
1857    struct radeon_cmdbuf ctx_cs;
1858 
1859    uint32_t binding_stride[MAX_VBS];
1860 
1861    uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
1862    uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
1863    uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
1864 
1865    bool use_per_attribute_vb_descs;
1866    bool can_use_simple_input;
1867    uint8_t last_vertex_attrib_bit;
1868    uint8_t next_vertex_stage : 8;
1869    uint32_t vb_desc_usage_mask;
1870    uint32_t vb_desc_alloc_size;
1871 
1872    uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
1873    union {
1874       struct {
1875          struct radv_multisample_state ms;
1876          struct radv_binning_state binning;
1877          struct radv_vrs_state vrs;
1878          uint32_t spi_baryc_cntl;
1879          unsigned esgs_ring_size;
1880          unsigned gsvs_ring_size;
1881          uint32_t vtx_base_sgpr;
1882          struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
1883          uint8_t vtx_emit_num;
1884          bool uses_drawid;
1885          bool uses_baseinstance;
1886          bool can_use_guardband;
1887          uint64_t needed_dynamic_state;
1888          bool disable_out_of_order_rast_for_occlusion;
1889          unsigned tess_patch_control_points;
1890          unsigned pa_su_sc_mode_cntl;
1891          unsigned db_depth_control;
1892          unsigned pa_cl_clip_cntl;
1893          unsigned cb_color_control;
1894          bool uses_dynamic_stride;
1895          bool uses_conservative_overestimate;
1896          bool negative_one_to_one;
1897 
1898          /* Used for rbplus */
1899          uint32_t col_format;
1900          uint32_t cb_target_mask;
1901 
1902          /* Whether the pipeline uses NGG (GFX10+). */
1903          bool is_ngg;
1904          bool has_ngg_culling;
1905 
1906          /* Last pre-PS API stage */
1907          gl_shader_stage last_vgt_api_stage;
1908 
1909          /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
1910          bool force_vrs_per_vertex;
1911       } graphics;
1912       struct {
1913          struct radv_pipeline_group_handle *rt_group_handles;
1914          struct radv_pipeline_shader_stack_size *rt_stack_sizes;
1915          bool dynamic_stack_size;
1916          uint32_t group_count;
1917          bool cs_regalloc_hang_bug;
1918       } compute;
1919       struct {
1920          unsigned stage_count;
1921          VkPipelineShaderStageCreateInfo *stages;
1922          unsigned group_count;
1923          VkRayTracingShaderGroupCreateInfoKHR *groups;
1924       } library;
1925    };
1926 
1927    unsigned max_waves;
1928    unsigned scratch_bytes_per_wave;
1929 
1930    /* Not NULL if graphics pipeline uses streamout. */
1931    struct radv_shader *streamout_shader;
1932 
1933    /* Unique pipeline hash identifier. */
1934    uint64_t pipeline_hash;
1935 
1936    /* Pipeline layout info. */
1937    uint32_t push_constant_size;
1938    uint32_t dynamic_offset_count;
1939 };
1940 
1941 static inline bool
radv_pipeline_has_gs(const struct radv_pipeline * pipeline)1942 radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
1943 {
1944    return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
1945 }
1946 
1947 static inline bool
radv_pipeline_has_tess(const struct radv_pipeline * pipeline)1948 radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
1949 {
1950    return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
1951 }
1952 
1953 static inline bool
radv_pipeline_has_mesh(const struct radv_pipeline * pipeline)1954 radv_pipeline_has_mesh(const struct radv_pipeline *pipeline)
1955 {
1956    return !!pipeline->shaders[MESA_SHADER_MESH];
1957 }
1958 
1959 bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline);
1960 
1961 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
1962 
1963 struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
1964                                                  gl_shader_stage stage, int idx);
1965 
1966 struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage);
1967 
1968 struct radv_graphics_pipeline_create_info {
1969    bool use_rectlist;
1970    bool db_depth_clear;
1971    bool db_stencil_clear;
1972    bool depth_compress_disable;
1973    bool stencil_compress_disable;
1974    bool resummarize_enable;
1975    uint32_t custom_blend_mode;
1976 };
1977 
1978 VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
1979                                        const VkGraphicsPipelineCreateInfo *pCreateInfo,
1980                                        const struct radv_graphics_pipeline_create_info *extra,
1981                                        const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
1982 
1983 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
1984                                       const VkComputePipelineCreateInfo *pCreateInfo,
1985                                       const VkAllocationCallbacks *pAllocator,
1986                                       const uint8_t *custom_hash,
1987                                       struct radv_pipeline_shader_stack_size *rt_stack_sizes,
1988                                       uint32_t rt_group_count, VkPipeline *pPipeline);
1989 
1990 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
1991                            const VkAllocationCallbacks *allocator);
1992 
1993 struct radv_binning_settings {
1994    unsigned context_states_per_bin;    /* allowed range: [1, 6] */
1995    unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
1996    unsigned fpovs_per_batch;           /* allowed range: [0, 255], 0 = unlimited */
1997 };
1998 
1999 struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
2000 
2001 struct vk_format_description;
2002 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
2003                                           int first_non_void);
2004 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
2005                                          int first_non_void);
2006 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2007 void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
2008                                   const struct util_format_description *desc, unsigned *dfmt,
2009                                   unsigned *nfmt, bool *post_shuffle,
2010                                   enum radv_vs_input_alpha_adjust *alpha_adjust);
2011 uint32_t radv_translate_colorformat(VkFormat format);
2012 uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
2013                                         int first_non_void);
2014 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2015 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2016 uint32_t radv_translate_dbformat(VkFormat format);
2017 uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
2018                                        int first_non_void);
2019 uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
2020                                       int first_non_void);
2021 bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
2022                                   VkClearColorValue *value);
2023 bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
2024                                             VkFormat format);
2025 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
2026                                           VkFormat format, bool *blendable);
2027 bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2, bool *sign_reinterpret);
2028 bool radv_is_atomic_format_supported(VkFormat format);
2029 bool radv_device_supports_etc(struct radv_physical_device *physical_device);
2030 
2031 struct radv_image_plane {
2032    VkFormat format;
2033    struct radeon_surf surface;
2034 };
2035 
2036 struct radv_image {
2037    struct vk_object_base base;
2038    VkImageType type;
2039    /* The original VkFormat provided by the client.  This may not match any
2040     * of the actual surface formats.
2041     */
2042    VkFormat vk_format;
2043    VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
2044    struct ac_surf_info info;
2045    VkImageTiling tiling;     /** VkImageCreateInfo::tiling */
2046    VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
2047 
2048    VkDeviceSize size;
2049    uint32_t alignment;
2050 
2051    unsigned queue_family_mask;
2052    bool exclusive;
2053    bool shareable;
2054    bool l2_coherent;
2055    bool dcc_sign_reinterpret;
2056    bool support_comp_to_single;
2057 
2058    /* Set when bound */
2059    struct radeon_winsys_bo *bo;
2060    VkDeviceSize offset;
2061    bool tc_compatible_cmask;
2062 
2063    uint64_t clear_value_offset;
2064    uint64_t fce_pred_offset;
2065    uint64_t dcc_pred_offset;
2066 
2067    /*
2068     * Metadata for the TC-compat zrange workaround. If the 32-bit value
2069     * stored at this offset is UINT_MAX, the driver will emit
2070     * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2071     * SET_CONTEXT_REG packet.
2072     */
2073    uint64_t tc_compat_zrange_offset;
2074 
2075    /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2076    VkDeviceMemory owned_memory;
2077 
2078    unsigned plane_count;
2079    struct radv_image_plane planes[0];
2080 };
2081 
2082 /* Whether the image has a htile  that is known consistent with the contents of
2083  * the image and is allowed to be in compressed form.
2084  *
2085  * If this is false reads that don't use the htile should be able to return
2086  * correct results.
2087  */
2088 bool radv_layout_is_htile_compressed(const struct radv_device *device,
2089                                      const struct radv_image *image, VkImageLayout layout,
2090                                      bool in_render_loop, unsigned queue_mask);
2091 
2092 bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2093                                 unsigned level, VkImageLayout layout, bool in_render_loop,
2094                                 unsigned queue_mask);
2095 
2096 bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2097                                 unsigned level, VkImageLayout layout, bool in_render_loop,
2098                                 unsigned queue_mask);
2099 
2100 bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2101                                   VkImageLayout layout, unsigned queue_mask);
2102 
2103 /**
2104  * Return whether the image has CMASK metadata for color surfaces.
2105  */
2106 static inline bool
radv_image_has_cmask(const struct radv_image * image)2107 radv_image_has_cmask(const struct radv_image *image)
2108 {
2109    return image->planes[0].surface.cmask_offset;
2110 }
2111 
2112 /**
2113  * Return whether the image has FMASK metadata for color surfaces.
2114  */
2115 static inline bool
radv_image_has_fmask(const struct radv_image * image)2116 radv_image_has_fmask(const struct radv_image *image)
2117 {
2118    return image->planes[0].surface.fmask_offset;
2119 }
2120 
2121 /**
2122  * Return whether the image has DCC metadata for color surfaces.
2123  */
2124 static inline bool
radv_image_has_dcc(const struct radv_image * image)2125 radv_image_has_dcc(const struct radv_image *image)
2126 {
2127    return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
2128           image->planes[0].surface.meta_offset;
2129 }
2130 
2131 /**
2132  * Return whether the image is TC-compatible CMASK.
2133  */
2134 static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image * image)2135 radv_image_is_tc_compat_cmask(const struct radv_image *image)
2136 {
2137    return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2138 }
2139 
2140 /**
2141  * Return whether DCC metadata is enabled for a level.
2142  */
2143 static inline bool
radv_dcc_enabled(const struct radv_image * image,unsigned level)2144 radv_dcc_enabled(const struct radv_image *image, unsigned level)
2145 {
2146    return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2147 }
2148 
2149 /**
2150  * Return whether the image has CB metadata.
2151  */
2152 static inline bool
radv_image_has_CB_metadata(const struct radv_image * image)2153 radv_image_has_CB_metadata(const struct radv_image *image)
2154 {
2155    return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2156 }
2157 
2158 /**
2159  * Return whether the image has HTILE metadata for depth surfaces.
2160  */
2161 static inline bool
radv_image_has_htile(const struct radv_image * image)2162 radv_image_has_htile(const struct radv_image *image)
2163 {
2164    return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
2165           image->planes[0].surface.meta_size;
2166 }
2167 
2168 /**
2169  * Return whether the image has VRS HTILE metadata for depth surfaces
2170  */
2171 static inline bool
radv_image_has_vrs_htile(const struct radv_device * device,const struct radv_image * image)2172 radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2173 {
2174    /* Any depth buffer can potentially use VRS. */
2175    return device->attachment_vrs_enabled && radv_image_has_htile(image) &&
2176           (image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2177 }
2178 
2179 /**
2180  * Return whether HTILE metadata is enabled for a level.
2181  */
2182 static inline bool
radv_htile_enabled(const struct radv_image * image,unsigned level)2183 radv_htile_enabled(const struct radv_image *image, unsigned level)
2184 {
2185    return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2186 }
2187 
2188 /**
2189  * Return whether the image is TC-compatible HTILE.
2190  */
2191 static inline bool
radv_image_is_tc_compat_htile(const struct radv_image * image)2192 radv_image_is_tc_compat_htile(const struct radv_image *image)
2193 {
2194    return radv_image_has_htile(image) &&
2195           (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2196 }
2197 
2198 /**
2199  * Return whether the entire HTILE buffer can be used for depth in order to
2200  * improve HiZ Z-Range precision.
2201  */
2202 static inline bool
radv_image_tile_stencil_disabled(const struct radv_device * device,const struct radv_image * image)2203 radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2204 {
2205    if (device->physical_device->rad_info.chip_class >= GFX9) {
2206       return !vk_format_has_stencil(image->vk_format) && !radv_image_has_vrs_htile(device, image);
2207    } else {
2208       /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2209        * the TC-compat ZRANGE issue even if no stencil is used.
2210        */
2211       return !vk_format_has_stencil(image->vk_format) && !radv_image_is_tc_compat_htile(image);
2212    }
2213 }
2214 
2215 static inline bool
radv_image_has_clear_value(const struct radv_image * image)2216 radv_image_has_clear_value(const struct radv_image *image)
2217 {
2218    return image->clear_value_offset != 0;
2219 }
2220 
2221 static inline uint64_t
radv_image_get_fast_clear_va(const struct radv_image * image,uint32_t base_level)2222 radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2223 {
2224    assert(radv_image_has_clear_value(image));
2225 
2226    uint64_t va = radv_buffer_get_va(image->bo);
2227    va += image->offset + image->clear_value_offset + base_level * 8;
2228    return va;
2229 }
2230 
2231 static inline uint64_t
radv_image_get_fce_pred_va(const struct radv_image * image,uint32_t base_level)2232 radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2233 {
2234    assert(image->fce_pred_offset != 0);
2235 
2236    uint64_t va = radv_buffer_get_va(image->bo);
2237    va += image->offset + image->fce_pred_offset + base_level * 8;
2238    return va;
2239 }
2240 
2241 static inline uint64_t
radv_image_get_dcc_pred_va(const struct radv_image * image,uint32_t base_level)2242 radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2243 {
2244    assert(image->dcc_pred_offset != 0);
2245 
2246    uint64_t va = radv_buffer_get_va(image->bo);
2247    va += image->offset + image->dcc_pred_offset + base_level * 8;
2248    return va;
2249 }
2250 
2251 static inline uint64_t
radv_get_tc_compat_zrange_va(const struct radv_image * image,uint32_t base_level)2252 radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2253 {
2254    assert(image->tc_compat_zrange_offset != 0);
2255 
2256    uint64_t va = radv_buffer_get_va(image->bo);
2257    va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
2258    return va;
2259 }
2260 
2261 static inline uint64_t
radv_get_ds_clear_value_va(const struct radv_image * image,uint32_t base_level)2262 radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2263 {
2264    assert(radv_image_has_clear_value(image));
2265 
2266    uint64_t va = radv_buffer_get_va(image->bo);
2267    va += image->offset + image->clear_value_offset + base_level * 8;
2268    return va;
2269 }
2270 
2271 static inline uint32_t
radv_get_htile_initial_value(const struct radv_device * device,const struct radv_image * image)2272 radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2273 {
2274    uint32_t initial_value;
2275 
2276    if (radv_image_tile_stencil_disabled(device, image)) {
2277       /* Z only (no stencil):
2278        *
2279        * |31     18|17      4|3     0|
2280        * +---------+---------+-------+
2281        * |  Max Z  |  Min Z  | ZMask |
2282        */
2283       initial_value = 0xfffc000f;
2284    } else {
2285       /* Z and stencil:
2286        *
2287        * |31       12|11 10|9    8|7   6|5   4|3     0|
2288        * +-----------+-----+------+-----+-----+-------+
2289        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
2290        *
2291        * SR0/SR1 contains the stencil test results. Initializing
2292        * SR0/SR1 to 0x3 means the stencil test result is unknown.
2293        *
2294        * Z, stencil and 4 bit VRS encoding:
2295        * |31       12|11        10|9    8|7          6|5   4|3     0|
2296        * +-----------+------------+------+------------+-----+-------+
2297        * |  Z Range  | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2298        */
2299       if (radv_image_has_vrs_htile(device, image)) {
2300          /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2301          initial_value = 0xfffff33f;
2302       } else {
2303          initial_value = 0xfffff3ff;
2304       }
2305    }
2306 
2307    return initial_value;
2308 }
2309 
2310 static inline bool
radv_image_get_iterate256(struct radv_device * device,struct radv_image * image)2311 radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
2312 {
2313    /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2314    return device->physical_device->rad_info.chip_class >= GFX10 &&
2315           (image->usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
2316                            VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2317           radv_image_is_tc_compat_htile(image) &&
2318           image->info.samples > 1;
2319 }
2320 
2321 unsigned radv_image_queue_family_mask(const struct radv_image *image,
2322                                       enum radv_queue_family family,
2323                                       enum radv_queue_family queue_family);
2324 
2325 static inline uint32_t
radv_get_layerCount(const struct radv_image * image,const VkImageSubresourceRange * range)2326 radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2327 {
2328    return range->layerCount == VK_REMAINING_ARRAY_LAYERS
2329              ? image->info.array_size - range->baseArrayLayer
2330              : range->layerCount;
2331 }
2332 
2333 static inline uint32_t
radv_get_levelCount(const struct radv_image * image,const VkImageSubresourceRange * range)2334 radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2335 {
2336    return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
2337                                                        : range->levelCount;
2338 }
2339 
2340 bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
2341 
2342 struct radeon_bo_metadata;
2343 void radv_init_metadata(struct radv_device *device, struct radv_image *image,
2344                         struct radeon_bo_metadata *metadata);
2345 
2346 void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
2347                                        uint64_t offset, uint32_t stride);
2348 
2349 union radv_descriptor {
2350    struct {
2351       uint32_t plane0_descriptor[8];
2352       uint32_t fmask_descriptor[8];
2353    };
2354    struct {
2355       uint32_t plane_descriptors[3][8];
2356    };
2357 };
2358 
2359 struct radv_image_view {
2360    struct vk_object_base base;
2361    struct radv_image *image; /**< VkImageViewCreateInfo::image */
2362 
2363    VkImageViewType type;
2364    VkImageAspectFlags aspect_mask;
2365    VkFormat vk_format;
2366    unsigned plane_id;
2367    uint32_t base_layer;
2368    uint32_t layer_count;
2369    uint32_t base_mip;
2370    uint32_t level_count;
2371    VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2372 
2373    /* Whether the image iview supports fast clear. */
2374    bool support_fast_clear;
2375 
2376    union radv_descriptor descriptor;
2377 
2378    /* Descriptor for use as a storage image as opposed to a sampled image.
2379     * This has a few differences for cube maps (e.g. type).
2380     */
2381    union radv_descriptor storage_descriptor;
2382 };
2383 
2384 struct radv_image_create_info {
2385    const VkImageCreateInfo *vk_info;
2386    bool scanout;
2387    bool no_metadata_planes;
2388    bool prime_blit_src;
2389    const struct radeon_bo_metadata *bo_metadata;
2390 };
2391 
2392 VkResult
2393 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2394                          const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2395                          struct radv_image *image);
2396 
2397 VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2398                            const VkAllocationCallbacks *alloc, VkImage *pImage);
2399 
2400 bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
2401                                      VkFormat format, VkImageCreateFlags flags,
2402                                      bool *sign_reinterpret);
2403 
2404 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
2405 
2406 VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2407                                  const VkNativeBufferANDROID *gralloc_info,
2408                                  const VkAllocationCallbacks *alloc, VkImage *out_image_h);
2409 uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
2410                                       const VkImageUsageFlags vk_usage);
2411 VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2412                                 unsigned priority,
2413                                 const VkImportAndroidHardwareBufferInfoANDROID *info);
2414 VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2415                                 unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
2416 
2417 VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2418 
2419 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2420 
2421 struct radv_image_view_extra_create_info {
2422    bool disable_compression;
2423    bool enable_compression;
2424 };
2425 
2426 void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2427                           const VkImageViewCreateInfo *pCreateInfo,
2428                           const struct radv_image_view_extra_create_info *extra_create_info);
2429 void radv_image_view_finish(struct radv_image_view *iview);
2430 
2431 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2432 
2433 struct radv_sampler_ycbcr_conversion_state {
2434    VkFormat format;
2435    VkSamplerYcbcrModelConversion ycbcr_model;
2436    VkSamplerYcbcrRange ycbcr_range;
2437    VkComponentMapping components;
2438    VkChromaLocation chroma_offsets[2];
2439    VkFilter chroma_filter;
2440 };
2441 
2442 struct radv_sampler_ycbcr_conversion {
2443    struct vk_object_base base;
2444    /* The state is hashed for the descriptor set layout. */
2445    struct radv_sampler_ycbcr_conversion_state state;
2446 };
2447 
2448 struct radv_buffer_view {
2449    struct vk_object_base base;
2450    struct radeon_winsys_bo *bo;
2451    VkFormat vk_format;
2452    uint64_t range; /**< VkBufferViewCreateInfo::range */
2453    uint32_t state[4];
2454 };
2455 void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2456                            const VkBufferViewCreateInfo *pCreateInfo);
2457 void radv_buffer_view_finish(struct radv_buffer_view *view);
2458 
2459 static inline struct VkExtent3D
radv_sanitize_image_extent(const VkImageType imageType,const struct VkExtent3D imageExtent)2460 radv_sanitize_image_extent(const VkImageType imageType, const struct VkExtent3D imageExtent)
2461 {
2462    switch (imageType) {
2463    case VK_IMAGE_TYPE_1D:
2464       return (VkExtent3D){imageExtent.width, 1, 1};
2465    case VK_IMAGE_TYPE_2D:
2466       return (VkExtent3D){imageExtent.width, imageExtent.height, 1};
2467    case VK_IMAGE_TYPE_3D:
2468       return imageExtent;
2469    default:
2470       unreachable("invalid image type");
2471    }
2472 }
2473 
2474 static inline struct VkOffset3D
radv_sanitize_image_offset(const VkImageType imageType,const struct VkOffset3D imageOffset)2475 radv_sanitize_image_offset(const VkImageType imageType, const struct VkOffset3D imageOffset)
2476 {
2477    switch (imageType) {
2478    case VK_IMAGE_TYPE_1D:
2479       return (VkOffset3D){imageOffset.x, 0, 0};
2480    case VK_IMAGE_TYPE_2D:
2481       return (VkOffset3D){imageOffset.x, imageOffset.y, 0};
2482    case VK_IMAGE_TYPE_3D:
2483       return imageOffset;
2484    default:
2485       unreachable("invalid image type");
2486    }
2487 }
2488 
2489 static inline bool
radv_image_extent_compare(const struct radv_image * image,const VkExtent3D * extent)2490 radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
2491 {
2492    if (extent->width != image->info.width || extent->height != image->info.height ||
2493        extent->depth != image->info.depth)
2494       return false;
2495    return true;
2496 }
2497 
2498 struct radv_sampler {
2499    struct vk_object_base base;
2500    uint32_t state[4];
2501    struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
2502    uint32_t border_color_slot;
2503 };
2504 
2505 struct radv_framebuffer {
2506    struct vk_object_base base;
2507    uint32_t width;
2508    uint32_t height;
2509    uint32_t layers;
2510 
2511 
2512    uint32_t attachment_count;
2513    struct radv_image_view *attachments[0];
2514 };
2515 
2516 struct radv_subpass_barrier {
2517    VkPipelineStageFlags2KHR src_stage_mask;
2518    VkAccessFlags2KHR src_access_mask;
2519    VkAccessFlags2KHR dst_access_mask;
2520 };
2521 
2522 void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
2523                                const struct radv_subpass_barrier *barrier);
2524 
2525 struct radv_subpass_attachment {
2526    uint32_t attachment;
2527    VkImageLayout layout;
2528    VkImageLayout stencil_layout;
2529    bool in_render_loop;
2530 };
2531 
2532 struct radv_subpass {
2533    uint32_t attachment_count;
2534    struct radv_subpass_attachment *attachments;
2535 
2536    uint32_t input_count;
2537    uint32_t color_count;
2538    struct radv_subpass_attachment *input_attachments;
2539    struct radv_subpass_attachment *color_attachments;
2540    struct radv_subpass_attachment *resolve_attachments;
2541    struct radv_subpass_attachment *depth_stencil_attachment;
2542    struct radv_subpass_attachment *ds_resolve_attachment;
2543    struct radv_subpass_attachment *vrs_attachment;
2544    VkResolveModeFlagBits depth_resolve_mode;
2545    VkResolveModeFlagBits stencil_resolve_mode;
2546 
2547    /** Subpass has at least one color resolve attachment */
2548    bool has_color_resolve;
2549 
2550    struct radv_subpass_barrier start_barrier;
2551 
2552    uint32_t view_mask;
2553 
2554    VkSampleCountFlagBits color_sample_count;
2555    VkSampleCountFlagBits depth_sample_count;
2556    VkSampleCountFlagBits max_sample_count;
2557 
2558    /* Whether the subpass has ingoing/outgoing external dependencies. */
2559    bool has_ingoing_dep;
2560    bool has_outgoing_dep;
2561 };
2562 
2563 uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
2564 
2565 struct radv_render_pass_attachment {
2566    VkFormat format;
2567    uint32_t samples;
2568    VkAttachmentLoadOp load_op;
2569    VkAttachmentLoadOp stencil_load_op;
2570    VkImageLayout initial_layout;
2571    VkImageLayout final_layout;
2572    VkImageLayout stencil_initial_layout;
2573    VkImageLayout stencil_final_layout;
2574 
2575    /* The subpass id in which the attachment will be used first/last. */
2576    uint32_t first_subpass_idx;
2577    uint32_t last_subpass_idx;
2578 };
2579 
2580 struct radv_render_pass {
2581    struct vk_object_base base;
2582    uint32_t attachment_count;
2583    uint32_t subpass_count;
2584    struct radv_subpass_attachment *subpass_attachments;
2585    struct radv_render_pass_attachment *attachments;
2586    struct radv_subpass_barrier end_barrier;
2587    struct radv_subpass subpasses[0];
2588 };
2589 
2590 VkResult radv_device_init_meta(struct radv_device *device);
2591 void radv_device_finish_meta(struct radv_device *device);
2592 
2593 struct radv_query_pool {
2594    struct vk_object_base base;
2595    struct radeon_winsys_bo *bo;
2596    uint32_t stride;
2597    uint32_t availability_offset;
2598    uint64_t size;
2599    char *ptr;
2600    VkQueryType type;
2601    uint32_t pipeline_stats_mask;
2602 };
2603 
2604 bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
2605 
2606 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2607                     const VkDeviceQueueCreateInfo *create_info,
2608                     const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority);
2609 
2610 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
2611                              struct radv_descriptor_set *set, unsigned idx);
2612 
2613 void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
2614                                      VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
2615                                      const VkWriteDescriptorSet *pDescriptorWrites,
2616                                      uint32_t descriptorCopyCount,
2617                                      const VkCopyDescriptorSet *pDescriptorCopies);
2618 
2619 void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
2620                                                   struct radv_cmd_buffer *cmd_buffer,
2621                                                   struct radv_descriptor_set *set,
2622                                                   VkDescriptorUpdateTemplate descriptorUpdateTemplate,
2623                                                   const void *pData);
2624 
2625 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
2626                                    VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
2627                                    uint32_t set, uint32_t descriptorWriteCount,
2628                                    const VkWriteDescriptorSet *pDescriptorWrites);
2629 
2630 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2631                        const VkImageSubresourceRange *range, uint32_t value);
2632 
2633 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2634                          const VkImageSubresourceRange *range);
2635 
2636 /* radv_nir_to_llvm.c */
2637 struct radv_shader_args;
2638 struct radv_nir_compiler_options;
2639 struct radv_shader_info;
2640 
2641 void llvm_compile_shader(const struct radv_nir_compiler_options *options,
2642                          const struct radv_shader_info *info, unsigned shader_count,
2643                          struct nir_shader *const *shaders, struct radv_shader_binary **binary,
2644                          const struct radv_shader_args *args);
2645 
2646 /* radv_shader_info.h */
2647 struct radv_shader_info;
2648 
2649 void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
2650                                const struct radv_pipeline_layout *layout,
2651                                const struct radv_pipeline_key *pipeline_key,
2652                                struct radv_shader_info *info);
2653 
2654 void radv_nir_shader_info_init(struct radv_shader_info *info);
2655 
2656 bool radv_thread_trace_init(struct radv_device *device);
2657 void radv_thread_trace_finish(struct radv_device *device);
2658 bool radv_begin_thread_trace(struct radv_queue *queue);
2659 bool radv_end_thread_trace(struct radv_queue *queue);
2660 bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
2661 void radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
2662                                      const void *data, uint32_t num_dwords);
2663 bool radv_is_instruction_timing_enabled(void);
2664 
2665 bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2666                           struct radv_buffer *buffer, const VkBufferImageCopy2KHR *region);
2667 
2668 /* radv_sqtt_layer_.c */
2669 struct radv_barrier_data {
2670    union {
2671       struct {
2672          uint16_t depth_stencil_expand : 1;
2673          uint16_t htile_hiz_range_expand : 1;
2674          uint16_t depth_stencil_resummarize : 1;
2675          uint16_t dcc_decompress : 1;
2676          uint16_t fmask_decompress : 1;
2677          uint16_t fast_clear_eliminate : 1;
2678          uint16_t fmask_color_expand : 1;
2679          uint16_t init_mask_ram : 1;
2680          uint16_t reserved : 8;
2681       };
2682       uint16_t all;
2683    } layout_transitions;
2684 };
2685 
2686 /**
2687  * Value for the reason field of an RGP barrier start marker originating from
2688  * the Vulkan client (does not include PAL-defined values). (Table 15)
2689  */
2690 enum rgp_barrier_reason {
2691    RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
2692 
2693    /* External app-generated barrier reasons, i.e. API synchronization
2694     * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
2695     */
2696    RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
2697    RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
2698    RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
2699 
2700    /* Internal barrier reasons, i.e. implicit synchronization inserted by
2701     * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
2702     */
2703    RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
2704    RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
2705    RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
2706    RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
2707    RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
2708 };
2709 
2710 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
2711 void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
2712 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
2713 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
2714 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
2715                                            VkImageAspectFlagBits aspects);
2716 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
2717 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
2718 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
2719 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
2720                                  enum rgp_barrier_reason reason);
2721 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
2722 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
2723 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
2724                                      const struct radv_barrier_data *barrier);
2725 
2726 uint64_t radv_get_current_time(void);
2727 
2728 static inline uint32_t
si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)2729 si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)
2730 {
2731    switch (gl_prim) {
2732    case SHADER_PRIM_POINTS:
2733       return 1;
2734    case SHADER_PRIM_LINES:
2735    case SHADER_PRIM_LINE_STRIP:
2736       return 2;
2737    case SHADER_PRIM_TRIANGLES:
2738    case SHADER_PRIM_TRIANGLE_STRIP:
2739       return 3;
2740    case SHADER_PRIM_LINES_ADJACENCY:
2741       return 4;
2742    case SHADER_PRIM_TRIANGLES_ADJACENCY:
2743       return 6;
2744    case SHADER_PRIM_QUADS:
2745       return V_028A6C_TRISTRIP;
2746    default:
2747       assert(0);
2748       return 0;
2749    }
2750 }
2751 
2752 static inline uint32_t
si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)2753 si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
2754 {
2755    switch (topology) {
2756    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2757    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
2758       return V_028A6C_POINTLIST;
2759    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2760    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2761    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2762    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2763       return V_028A6C_LINESTRIP;
2764    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2765    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2766    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2767    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2768    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2769       return V_028A6C_TRISTRIP;
2770    default:
2771       assert(0);
2772       return 0;
2773    }
2774 }
2775 
2776 struct radv_extra_render_pass_begin_info {
2777    bool disable_dcc;
2778 };
2779 
2780 void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
2781                                        const VkRenderPassBeginInfo *pRenderPassBegin,
2782                                        const struct radv_extra_render_pass_begin_info *extra_info);
2783 void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer);
2784 
2785 static inline uint32_t
si_translate_prim(unsigned topology)2786 si_translate_prim(unsigned topology)
2787 {
2788    switch (topology) {
2789    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2790       return V_008958_DI_PT_POINTLIST;
2791    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2792       return V_008958_DI_PT_LINELIST;
2793    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2794       return V_008958_DI_PT_LINESTRIP;
2795    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2796       return V_008958_DI_PT_TRILIST;
2797    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2798       return V_008958_DI_PT_TRISTRIP;
2799    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2800       return V_008958_DI_PT_TRIFAN;
2801    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2802       return V_008958_DI_PT_LINELIST_ADJ;
2803    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2804       return V_008958_DI_PT_LINESTRIP_ADJ;
2805    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2806       return V_008958_DI_PT_TRILIST_ADJ;
2807    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2808       return V_008958_DI_PT_TRISTRIP_ADJ;
2809    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
2810       return V_008958_DI_PT_PATCH;
2811    default:
2812       assert(0);
2813       return 0;
2814    }
2815 }
2816 
2817 static inline uint32_t
si_translate_stencil_op(enum VkStencilOp op)2818 si_translate_stencil_op(enum VkStencilOp op)
2819 {
2820    switch (op) {
2821    case VK_STENCIL_OP_KEEP:
2822       return V_02842C_STENCIL_KEEP;
2823    case VK_STENCIL_OP_ZERO:
2824       return V_02842C_STENCIL_ZERO;
2825    case VK_STENCIL_OP_REPLACE:
2826       return V_02842C_STENCIL_REPLACE_TEST;
2827    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
2828       return V_02842C_STENCIL_ADD_CLAMP;
2829    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
2830       return V_02842C_STENCIL_SUB_CLAMP;
2831    case VK_STENCIL_OP_INVERT:
2832       return V_02842C_STENCIL_INVERT;
2833    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
2834       return V_02842C_STENCIL_ADD_WRAP;
2835    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
2836       return V_02842C_STENCIL_SUB_WRAP;
2837    default:
2838       return 0;
2839    }
2840 }
2841 
2842 static inline uint32_t
si_translate_blend_logic_op(VkLogicOp op)2843 si_translate_blend_logic_op(VkLogicOp op)
2844 {
2845    switch (op) {
2846    case VK_LOGIC_OP_CLEAR:
2847       return V_028808_ROP3_CLEAR;
2848    case VK_LOGIC_OP_AND:
2849       return V_028808_ROP3_AND;
2850    case VK_LOGIC_OP_AND_REVERSE:
2851       return V_028808_ROP3_AND_REVERSE;
2852    case VK_LOGIC_OP_COPY:
2853       return V_028808_ROP3_COPY;
2854    case VK_LOGIC_OP_AND_INVERTED:
2855       return V_028808_ROP3_AND_INVERTED;
2856    case VK_LOGIC_OP_NO_OP:
2857       return V_028808_ROP3_NO_OP;
2858    case VK_LOGIC_OP_XOR:
2859       return V_028808_ROP3_XOR;
2860    case VK_LOGIC_OP_OR:
2861       return V_028808_ROP3_OR;
2862    case VK_LOGIC_OP_NOR:
2863       return V_028808_ROP3_NOR;
2864    case VK_LOGIC_OP_EQUIVALENT:
2865       return V_028808_ROP3_EQUIVALENT;
2866    case VK_LOGIC_OP_INVERT:
2867       return V_028808_ROP3_INVERT;
2868    case VK_LOGIC_OP_OR_REVERSE:
2869       return V_028808_ROP3_OR_REVERSE;
2870    case VK_LOGIC_OP_COPY_INVERTED:
2871       return V_028808_ROP3_COPY_INVERTED;
2872    case VK_LOGIC_OP_OR_INVERTED:
2873       return V_028808_ROP3_OR_INVERTED;
2874    case VK_LOGIC_OP_NAND:
2875       return V_028808_ROP3_NAND;
2876    case VK_LOGIC_OP_SET:
2877       return V_028808_ROP3_SET;
2878    default:
2879       unreachable("Unhandled logic op");
2880    }
2881 }
2882 
2883 /*
2884  * Queue helper to get ring.
2885  * placed here as it needs queue + device structs.
2886  */
2887 static inline enum ring_type
radv_queue_ring(struct radv_queue * queue)2888 radv_queue_ring(struct radv_queue *queue)
2889 {
2890    return radv_queue_family_to_ring(queue->device->physical_device, queue->qf);
2891 }
2892 
2893 /**
2894  * Helper used for debugging compiler issues by enabling/disabling LLVM for a
2895  * specific shader stage (developers only).
2896  */
2897 static inline bool
radv_use_llvm_for_stage(struct radv_device * device,UNUSED gl_shader_stage stage)2898 radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
2899 {
2900    return device->physical_device->use_llvm;
2901 }
2902 
2903 struct radv_acceleration_structure {
2904    struct vk_object_base base;
2905 
2906    struct radeon_winsys_bo *bo;
2907    uint64_t mem_offset;
2908    uint64_t size;
2909 };
2910 
2911 static inline uint64_t
radv_accel_struct_get_va(const struct radv_acceleration_structure * accel)2912 radv_accel_struct_get_va(const struct radv_acceleration_structure *accel)
2913 {
2914    return radv_buffer_get_va(accel->bo) + accel->mem_offset;
2915 }
2916 
2917 /* radv_perfcounter.c */
2918 void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
2919 void radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs);
2920 void radv_perfcounter_emit_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
2921 void radv_perfcounter_emit_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
2922 
2923 /* radv_spm.c */
2924 bool radv_spm_init(struct radv_device *device);
2925 void radv_spm_finish(struct radv_device *device);
2926 void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
2927 
2928 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
2929    VK_FROM_HANDLE(__radv_type, __name, __handle)
2930 
2931 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
2932                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2933 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2934 VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
2935 VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
2936                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2937 VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2938 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base,
2939                                VkAccelerationStructureKHR,
2940                                VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
2941 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool,
2942                                VK_OBJECT_TYPE_COMMAND_POOL)
2943 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
2944 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
2945                                VK_OBJECT_TYPE_BUFFER_VIEW)
2946 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
2947                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2948 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
2949                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2950 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, base,
2951                                VkDescriptorSetLayout,
2952                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2953 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
2954                                VkDescriptorUpdateTemplate,
2955                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2956 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
2957                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2958 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2959 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_framebuffer, base, VkFramebuffer,
2960                                VK_OBJECT_TYPE_FRAMEBUFFER)
2961 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
2962 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, base, VkImageView,
2963                                VK_OBJECT_TYPE_IMAGE_VIEW);
2964 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
2965                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2966 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
2967                                VK_OBJECT_TYPE_PIPELINE)
2968 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
2969                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2970 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
2971                                VK_OBJECT_TYPE_QUERY_POOL)
2972 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass,
2973                                VK_OBJECT_TYPE_RENDER_PASS)
2974 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
2975                                VK_OBJECT_TYPE_SAMPLER)
2976 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base,
2977                                VkSamplerYcbcrConversion,
2978                                VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
2979 
2980 #ifdef __cplusplus
2981 }
2982 #endif
2983 
2984 #endif /* RADV_PRIVATE_H */
2985