1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30
31 #include <assert.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #ifdef HAVE_VALGRIND
38 #include <memcheck.h>
39 #include <valgrind.h>
40 #define VG(x) x
41 #else
42 #define VG(x) ((void)0)
43 #endif
44
45 #include "c11/threads.h"
46 #ifndef _WIN32
47 #include <amdgpu.h>
48 #include <xf86drm.h>
49 #endif
50 #include "compiler/shader_enums.h"
51 #include "util/bitscan.h"
52 #include "util/cnd_monotonic.h"
53 #include "util/list.h"
54 #include "util/macros.h"
55 #include "util/rwlock.h"
56 #include "util/xmlconfig.h"
57 #include "vk_alloc.h"
58 #include "vk_command_buffer.h"
59 #include "vk_command_pool.h"
60 #include "vk_debug_report.h"
61 #include "vk_device.h"
62 #include "vk_format.h"
63 #include "vk_instance.h"
64 #include "vk_log.h"
65 #include "vk_physical_device.h"
66 #include "vk_shader_module.h"
67 #include "vk_queue.h"
68 #include "vk_util.h"
69 #include "vk_image.h"
70
71 #include "ac_binary.h"
72 #include "ac_gpu_info.h"
73 #include "ac_shader_util.h"
74 #include "ac_spm.h"
75 #include "ac_sqtt.h"
76 #include "ac_surface.h"
77 #include "radv_constants.h"
78 #include "radv_descriptor_set.h"
79 #include "radv_radeon_winsys.h"
80 #include "radv_shader.h"
81 #include "sid.h"
82
83 /* Pre-declarations needed for WSI entrypoints */
84 struct wl_surface;
85 struct wl_display;
86 typedef struct xcb_connection_t xcb_connection_t;
87 typedef uint32_t xcb_visualid_t;
88 typedef uint32_t xcb_window_t;
89
90 #include <vulkan/vk_android_native_buffer.h>
91 #include <vulkan/vk_icd.h>
92 #include <vulkan/vulkan.h>
93 #include <vulkan/vulkan_android.h>
94
95 #include "radv_entrypoints.h"
96
97 #include "wsi_common.h"
98
99 #ifdef __cplusplus
100 extern "C"
101 {
102 #endif
103
104 /* Helper to determine if we should compile
105 * any of the Android AHB support.
106 *
107 * To actually enable the ext we also need
108 * the necessary kernel support.
109 */
110 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
111 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
112 #include <vndk/hardware_buffer.h>
113 #else
114 #define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
115 #endif
116
117 #ifdef _WIN32
118 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
119 #else
120 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
121 #endif
122
123 #ifdef _WIN32
124 #define radv_printflike(a, b)
125 #else
126 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
127 #endif
128
129 static inline uint32_t
align_u32(uint32_t v,uint32_t a)130 align_u32(uint32_t v, uint32_t a)
131 {
132 assert(a != 0 && a == (a & -a));
133 return (v + a - 1) & ~(a - 1);
134 }
135
136 static inline uint32_t
align_u32_npot(uint32_t v,uint32_t a)137 align_u32_npot(uint32_t v, uint32_t a)
138 {
139 return (v + a - 1) / a * a;
140 }
141
142 static inline uint64_t
align_u64(uint64_t v,uint64_t a)143 align_u64(uint64_t v, uint64_t a)
144 {
145 assert(a != 0 && a == (a & -a));
146 return (v + a - 1) & ~(a - 1);
147 }
148
149 static inline int32_t
align_i32(int32_t v,int32_t a)150 align_i32(int32_t v, int32_t a)
151 {
152 assert(a != 0 && a == (a & -a));
153 return (v + a - 1) & ~(a - 1);
154 }
155
156 /** Alignment must be a power of 2. */
157 static inline bool
radv_is_aligned(uintmax_t n,uintmax_t a)158 radv_is_aligned(uintmax_t n, uintmax_t a)
159 {
160 assert(a == (a & -a));
161 return (n & (a - 1)) == 0;
162 }
163
164 static inline uint32_t
round_up_u32(uint32_t v,uint32_t a)165 round_up_u32(uint32_t v, uint32_t a)
166 {
167 return (v + a - 1) / a;
168 }
169
170 static inline uint64_t
round_up_u64(uint64_t v,uint64_t a)171 round_up_u64(uint64_t v, uint64_t a)
172 {
173 return (v + a - 1) / a;
174 }
175
176 static inline uint32_t
radv_minify(uint32_t n,uint32_t levels)177 radv_minify(uint32_t n, uint32_t levels)
178 {
179 if (unlikely(n == 0))
180 return 0;
181 else
182 return MAX2(n >> levels, 1);
183 }
184 static inline float
radv_clamp_f(float f,float min,float max)185 radv_clamp_f(float f, float min, float max)
186 {
187 assert(min < max);
188
189 if (f > max)
190 return max;
191 else if (f < min)
192 return min;
193 else
194 return f;
195 }
196
197 static inline bool
radv_clear_mask(uint32_t * inout_mask,uint32_t clear_mask)198 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
199 {
200 if (*inout_mask & clear_mask) {
201 *inout_mask &= ~clear_mask;
202 return true;
203 } else {
204 return false;
205 }
206 }
207
208 static inline int
radv_float_to_sfixed(float value,unsigned frac_bits)209 radv_float_to_sfixed(float value, unsigned frac_bits)
210 {
211 return value * (1 << frac_bits);
212 }
213
214 static inline unsigned int
radv_float_to_ufixed(float value,unsigned frac_bits)215 radv_float_to_ufixed(float value, unsigned frac_bits)
216 {
217 return value * (1 << frac_bits);
218 }
219
220 /* Whenever we generate an error, pass it through this function. Useful for
221 * debugging, where we can break on it. Only call at error site, not when
222 * propagating errors. Might be useful to plug in a stack trace here.
223 */
224
225 struct radv_image_view;
226 struct radv_instance;
227
228 void radv_loge(const char *format, ...) radv_printflike(1, 2);
229 void radv_loge_v(const char *format, va_list va);
230 void radv_logi(const char *format, ...) radv_printflike(1, 2);
231 void radv_logi_v(const char *format, va_list va);
232
233 /* A non-fatal assert. Useful for debugging. */
234 #ifdef NDEBUG
235 #define radv_assert(x) \
236 do { \
237 } while (0)
238 #else
239 #define radv_assert(x) \
240 do { \
241 if (unlikely(!(x))) \
242 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
243 } while (0)
244 #endif
245
246 int radv_get_instance_entrypoint_index(const char *name);
247 int radv_get_device_entrypoint_index(const char *name);
248 int radv_get_physical_device_entrypoint_index(const char *name);
249
250 const char *radv_get_instance_entry_name(int index);
251 const char *radv_get_physical_device_entry_name(int index);
252 const char *radv_get_device_entry_name(int index);
253
254 /* queue types */
255 enum radv_queue_family {
256 RADV_QUEUE_GENERAL,
257 RADV_QUEUE_COMPUTE,
258 RADV_QUEUE_TRANSFER,
259 RADV_MAX_QUEUE_FAMILIES,
260 RADV_QUEUE_FOREIGN = RADV_MAX_QUEUE_FAMILIES,
261 };
262
263 struct radv_physical_device {
264 struct vk_physical_device vk;
265
266 /* Link in radv_instance::physical_devices */
267 struct list_head link;
268
269 struct radv_instance *instance;
270
271 struct radeon_winsys *ws;
272 struct radeon_info rad_info;
273 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
274 uint8_t driver_uuid[VK_UUID_SIZE];
275 uint8_t device_uuid[VK_UUID_SIZE];
276 uint8_t cache_uuid[VK_UUID_SIZE];
277
278 int local_fd;
279 int master_fd;
280 struct wsi_device wsi_device;
281
282 bool out_of_order_rast_allowed;
283
284 /* Whether DCC should be enabled for MSAA textures. */
285 bool dcc_msaa_allowed;
286
287 /* Whether to enable NGG. */
288 bool use_ngg;
289
290 /* Whether to enable NGG culling. */
291 bool use_ngg_culling;
292
293 /* Whether to enable NGG streamout. */
294 bool use_ngg_streamout;
295
296 /* Number of threads per wave. */
297 uint8_t ps_wave_size;
298 uint8_t cs_wave_size;
299 uint8_t ge_wave_size;
300 uint8_t rt_wave_size;
301
302 /* Whether to use the LLVM compiler backend */
303 bool use_llvm;
304
305 /* Whether to emulate ETC2 image support on HW without support. */
306 bool emulate_etc2;
307
308 /* This is the drivers on-disk cache used as a fallback as opposed to
309 * the pipeline cache defined by apps.
310 */
311 struct disk_cache *disk_cache;
312
313 VkPhysicalDeviceMemoryProperties memory_properties;
314 enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
315 enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
316 unsigned heaps;
317
318 #ifndef _WIN32
319 int available_nodes;
320 drmPciBusInfo bus_info;
321
322 dev_t primary_devid;
323 dev_t render_devid;
324 #endif
325
326 nir_shader_compiler_options nir_options[MESA_VULKAN_SHADER_STAGES];
327
328 enum radv_queue_family vk_queue_to_radv[RADV_MAX_QUEUE_FAMILIES];
329 uint32_t num_queues;
330 };
331
332 struct radv_instance {
333 struct vk_instance vk;
334
335 VkAllocationCallbacks alloc;
336
337 uint64_t debug_flags;
338 uint64_t perftest_flags;
339
340 bool physical_devices_enumerated;
341 struct list_head physical_devices;
342
343 struct driOptionCache dri_options;
344 struct driOptionCache available_dri_options;
345
346 /**
347 * Workarounds for game bugs.
348 */
349 bool enable_mrt_output_nan_fixup;
350 bool disable_tc_compat_htile_in_general;
351 bool disable_shrink_image_store;
352 bool absolute_depth_bias;
353 bool report_apu_as_dgpu;
354 bool disable_htile_layers;
355 bool disable_aniso_single_level;
356 bool zero_vram;
357 };
358
359 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
360 void radv_finish_wsi(struct radv_physical_device *physical_device);
361
362 struct cache_entry;
363
364 struct radv_pipeline_cache {
365 struct vk_object_base base;
366 struct radv_device *device;
367 mtx_t mutex;
368 VkPipelineCacheCreateFlags flags;
369
370 uint32_t total_size;
371 uint32_t table_size;
372 uint32_t kernel_count;
373 struct cache_entry **hash_table;
374 bool modified;
375
376 VkAllocationCallbacks alloc;
377 };
378
379 struct radv_shader_binary;
380 struct radv_shader;
381 struct radv_pipeline_shader_stack_size;
382
383 void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
384 void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
385 bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
386
387 bool radv_create_shaders_from_pipeline_cache(
388 struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
389 struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
390 uint32_t *num_stack_sizes, bool *found_in_application_cache);
391
392 void radv_pipeline_cache_insert_shaders(
393 struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
394 struct radv_pipeline *pipeline, struct radv_shader_binary *const *binaries,
395 const struct radv_pipeline_shader_stack_size *stack_sizes, uint32_t num_stack_sizes);
396
397 VkResult radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
398 struct radv_shader_binary **binaries,
399 struct radv_shader_binary *gs_copy_binary);
400
401 enum radv_blit_ds_layout {
402 RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
403 RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
404 RADV_BLIT_DS_LAYOUT_COUNT,
405 };
406
407 static inline enum radv_blit_ds_layout
radv_meta_blit_ds_to_type(VkImageLayout layout)408 radv_meta_blit_ds_to_type(VkImageLayout layout)
409 {
410 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
411 : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
412 }
413
414 static inline VkImageLayout
radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)415 radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
416 {
417 return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
418 : VK_IMAGE_LAYOUT_GENERAL;
419 }
420
421 enum radv_meta_dst_layout {
422 RADV_META_DST_LAYOUT_GENERAL,
423 RADV_META_DST_LAYOUT_OPTIMAL,
424 RADV_META_DST_LAYOUT_COUNT,
425 };
426
427 static inline enum radv_meta_dst_layout
radv_meta_dst_layout_from_layout(VkImageLayout layout)428 radv_meta_dst_layout_from_layout(VkImageLayout layout)
429 {
430 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
431 : RADV_META_DST_LAYOUT_OPTIMAL;
432 }
433
434 static inline VkImageLayout
radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)435 radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
436 {
437 return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
438 : VK_IMAGE_LAYOUT_GENERAL;
439 }
440
441 struct radv_meta_state {
442 VkAllocationCallbacks alloc;
443
444 struct radv_pipeline_cache cache;
445
446 /*
447 * For on-demand pipeline creation, makes sure that
448 * only one thread tries to build a pipeline at the same time.
449 */
450 mtx_t mtx;
451
452 /**
453 * Use array element `i` for images with `2^i` samples.
454 */
455 struct {
456 VkRenderPass render_pass[NUM_META_FS_KEYS];
457 VkPipeline color_pipelines[NUM_META_FS_KEYS];
458 } color_clear[MAX_SAMPLES_LOG2][MAX_RTS];
459
460 struct {
461 VkRenderPass depthstencil_rp;
462 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
463 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
464 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
465
466 VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
467 VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
468 VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
469 } ds_clear[MAX_SAMPLES_LOG2];
470
471 VkPipelineLayout clear_color_p_layout;
472 VkPipelineLayout clear_depth_p_layout;
473 VkPipelineLayout clear_depth_unrestricted_p_layout;
474
475 /* Optimized compute fast HTILE clear for stencil or depth only. */
476 VkPipeline clear_htile_mask_pipeline;
477 VkPipelineLayout clear_htile_mask_p_layout;
478 VkDescriptorSetLayout clear_htile_mask_ds_layout;
479
480 /* Copy VRS into HTILE. */
481 VkPipeline copy_vrs_htile_pipeline;
482 VkPipelineLayout copy_vrs_htile_p_layout;
483 VkDescriptorSetLayout copy_vrs_htile_ds_layout;
484
485 /* Clear DCC with comp-to-single. */
486 VkPipeline clear_dcc_comp_to_single_pipeline[2]; /* 0: 1x, 1: 2x/4x/8x */
487 VkPipelineLayout clear_dcc_comp_to_single_p_layout;
488 VkDescriptorSetLayout clear_dcc_comp_to_single_ds_layout;
489
490 struct {
491 VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
492
493 /** Pipeline that blits from a 1D image. */
494 VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
495
496 /** Pipeline that blits from a 2D image. */
497 VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
498
499 /** Pipeline that blits from a 3D image. */
500 VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
501
502 VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
503 VkPipeline depth_only_1d_pipeline;
504 VkPipeline depth_only_2d_pipeline;
505 VkPipeline depth_only_3d_pipeline;
506
507 VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
508 VkPipeline stencil_only_1d_pipeline;
509 VkPipeline stencil_only_2d_pipeline;
510 VkPipeline stencil_only_3d_pipeline;
511 VkPipelineLayout pipeline_layout;
512 VkDescriptorSetLayout ds_layout;
513 } blit;
514
515 struct {
516 VkPipelineLayout p_layouts[5];
517 VkDescriptorSetLayout ds_layouts[5];
518 VkPipeline pipelines[5][NUM_META_FS_KEYS];
519
520 VkPipeline depth_only_pipeline[5];
521
522 VkPipeline stencil_only_pipeline[5];
523 } blit2d[MAX_SAMPLES_LOG2];
524
525 VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
526 VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
527 VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
528
529 struct {
530 VkPipelineLayout img_p_layout;
531 VkDescriptorSetLayout img_ds_layout;
532 VkPipeline pipeline;
533 VkPipeline pipeline_3d;
534 } itob;
535 struct {
536 VkPipelineLayout img_p_layout;
537 VkDescriptorSetLayout img_ds_layout;
538 VkPipeline pipeline;
539 VkPipeline pipeline_3d;
540 } btoi;
541 struct {
542 VkPipelineLayout img_p_layout;
543 VkDescriptorSetLayout img_ds_layout;
544 VkPipeline pipeline;
545 } btoi_r32g32b32;
546 struct {
547 VkPipelineLayout img_p_layout;
548 VkDescriptorSetLayout img_ds_layout;
549 VkPipeline pipeline[MAX_SAMPLES_LOG2];
550 VkPipeline pipeline_3d;
551 } itoi;
552 struct {
553 VkPipelineLayout img_p_layout;
554 VkDescriptorSetLayout img_ds_layout;
555 VkPipeline pipeline;
556 } itoi_r32g32b32;
557 struct {
558 VkPipelineLayout img_p_layout;
559 VkDescriptorSetLayout img_ds_layout;
560 VkPipeline pipeline[MAX_SAMPLES_LOG2];
561 VkPipeline pipeline_3d;
562 } cleari;
563 struct {
564 VkPipelineLayout img_p_layout;
565 VkDescriptorSetLayout img_ds_layout;
566 VkPipeline pipeline;
567 } cleari_r32g32b32;
568 struct {
569 VkPipelineLayout p_layout;
570 VkDescriptorSetLayout ds_layout;
571 VkPipeline pipeline[MAX_SAMPLES_LOG2];
572 } fmask_copy;
573
574 struct {
575 VkPipelineLayout p_layout;
576 VkPipeline pipeline[NUM_META_FS_KEYS];
577 VkRenderPass pass[NUM_META_FS_KEYS];
578 } resolve;
579
580 struct {
581 VkDescriptorSetLayout ds_layout;
582 VkPipelineLayout p_layout;
583 struct {
584 VkPipeline pipeline;
585 VkPipeline i_pipeline;
586 VkPipeline srgb_pipeline;
587 } rc[MAX_SAMPLES_LOG2];
588
589 VkPipeline depth_zero_pipeline;
590 struct {
591 VkPipeline average_pipeline;
592 VkPipeline max_pipeline;
593 VkPipeline min_pipeline;
594 } depth[MAX_SAMPLES_LOG2];
595
596 VkPipeline stencil_zero_pipeline;
597 struct {
598 VkPipeline max_pipeline;
599 VkPipeline min_pipeline;
600 } stencil[MAX_SAMPLES_LOG2];
601 } resolve_compute;
602
603 struct {
604 VkDescriptorSetLayout ds_layout;
605 VkPipelineLayout p_layout;
606
607 struct {
608 VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
609 VkPipeline pipeline[NUM_META_FS_KEYS];
610 } rc[MAX_SAMPLES_LOG2];
611
612 VkRenderPass depth_render_pass;
613 VkPipeline depth_zero_pipeline;
614 struct {
615 VkPipeline average_pipeline;
616 VkPipeline max_pipeline;
617 VkPipeline min_pipeline;
618 } depth[MAX_SAMPLES_LOG2];
619
620 VkRenderPass stencil_render_pass;
621 VkPipeline stencil_zero_pipeline;
622 struct {
623 VkPipeline max_pipeline;
624 VkPipeline min_pipeline;
625 } stencil[MAX_SAMPLES_LOG2];
626 } resolve_fragment;
627
628 struct {
629 VkPipelineLayout p_layout;
630 VkPipeline decompress_pipeline;
631 VkPipeline resummarize_pipeline;
632 VkRenderPass pass;
633 } depth_decomp[MAX_SAMPLES_LOG2];
634
635 VkDescriptorSetLayout expand_depth_stencil_compute_ds_layout;
636 VkPipelineLayout expand_depth_stencil_compute_p_layout;
637 VkPipeline expand_depth_stencil_compute_pipeline;
638
639 struct {
640 VkPipelineLayout p_layout;
641 VkPipeline cmask_eliminate_pipeline;
642 VkPipeline fmask_decompress_pipeline;
643 VkPipeline dcc_decompress_pipeline;
644 VkRenderPass pass;
645
646 VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
647 VkPipelineLayout dcc_decompress_compute_p_layout;
648 VkPipeline dcc_decompress_compute_pipeline;
649 } fast_clear_flush;
650
651 struct {
652 VkPipelineLayout fill_p_layout;
653 VkPipelineLayout copy_p_layout;
654 VkDescriptorSetLayout fill_ds_layout;
655 VkDescriptorSetLayout copy_ds_layout;
656 VkPipeline fill_pipeline;
657 VkPipeline copy_pipeline;
658 } buffer;
659
660 struct {
661 VkDescriptorSetLayout ds_layout;
662 VkPipelineLayout p_layout;
663 VkPipeline occlusion_query_pipeline;
664 VkPipeline pipeline_statistics_query_pipeline;
665 VkPipeline tfb_query_pipeline;
666 VkPipeline timestamp_query_pipeline;
667 } query;
668
669 struct {
670 VkDescriptorSetLayout ds_layout;
671 VkPipelineLayout p_layout;
672 VkPipeline pipeline[MAX_SAMPLES_LOG2];
673 } fmask_expand;
674
675 struct {
676 VkDescriptorSetLayout ds_layout;
677 VkPipelineLayout p_layout;
678 VkPipeline pipeline[32];
679 } dcc_retile;
680
681 struct {
682 VkPipelineLayout leaf_p_layout;
683 VkPipeline leaf_pipeline;
684 VkPipelineLayout internal_p_layout;
685 VkPipeline internal_pipeline;
686 VkPipelineLayout copy_p_layout;
687 VkPipeline copy_pipeline;
688 } accel_struct_build;
689
690 struct {
691 VkDescriptorSetLayout ds_layout;
692 VkPipelineLayout p_layout;
693 VkPipeline pipeline;
694 } etc_decode;
695 };
696
697 #define RADV_NUM_HW_CTX (RADEON_CTX_PRIORITY_REALTIME + 1)
698
699 struct radv_deferred_queue_submission;
700
701 static inline enum radv_queue_family
vk_queue_to_radv(struct radv_physical_device * phys_dev,int queue_family_index)702 vk_queue_to_radv(struct radv_physical_device *phys_dev,
703 int queue_family_index)
704 {
705 assert(queue_family_index < RADV_MAX_QUEUE_FAMILIES);
706 return phys_dev->vk_queue_to_radv[queue_family_index];
707 }
708
709 enum ring_type radv_queue_family_to_ring(struct radv_physical_device *physical_device,
710 enum radv_queue_family f);
711
712 struct radv_queue {
713 struct vk_queue vk;
714 struct radv_device *device;
715 struct radeon_winsys_ctx *hw_ctx;
716 enum radeon_ctx_priority priority;
717
718 enum radv_queue_family qf;
719 uint32_t scratch_size_per_wave;
720 uint32_t scratch_waves;
721 uint32_t compute_scratch_size_per_wave;
722 uint32_t compute_scratch_waves;
723 uint32_t esgs_ring_size;
724 uint32_t gsvs_ring_size;
725 bool has_tess_rings;
726 bool has_gds;
727 bool has_gds_oa;
728 bool has_sample_positions;
729
730 struct radeon_winsys_bo *scratch_bo;
731 struct radeon_winsys_bo *descriptor_bo;
732 struct radeon_winsys_bo *compute_scratch_bo;
733 struct radeon_winsys_bo *esgs_ring_bo;
734 struct radeon_winsys_bo *gsvs_ring_bo;
735 struct radeon_winsys_bo *tess_rings_bo;
736 struct radeon_winsys_bo *gds_bo;
737 struct radeon_winsys_bo *gds_oa_bo;
738 struct radeon_cmdbuf *initial_preamble_cs;
739 struct radeon_cmdbuf *initial_full_flush_preamble_cs;
740 struct radeon_cmdbuf *continue_preamble_cs;
741 };
742
743 #define RADV_BORDER_COLOR_COUNT 4096
744 #define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
745
746 struct radv_device_border_color_data {
747 bool used[RADV_BORDER_COLOR_COUNT];
748
749 struct radeon_winsys_bo *bo;
750 VkClearColorValue *colors_gpu_ptr;
751
752 /* Mutex is required to guarantee vkCreateSampler thread safety
753 * given that we are writing to a buffer and checking color occupation */
754 mtx_t mutex;
755 };
756
757 enum radv_force_vrs {
758 RADV_FORCE_VRS_1x1 = 0,
759 RADV_FORCE_VRS_2x2,
760 RADV_FORCE_VRS_2x1,
761 RADV_FORCE_VRS_1x2,
762 };
763
764 struct radv_notifier {
765 int fd;
766 int watch;
767 bool quit;
768 thrd_t thread;
769 };
770
771 struct radv_device {
772 struct vk_device vk;
773
774 struct radv_instance *instance;
775 struct radeon_winsys *ws;
776
777 struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
778 struct radv_meta_state meta_state;
779
780 struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
781 int queue_count[RADV_MAX_QUEUE_FAMILIES];
782
783 bool pbb_allowed;
784 uint32_t tess_offchip_block_dw_size;
785 uint32_t scratch_waves;
786 uint32_t dispatch_initiator;
787
788 uint32_t gs_table_depth;
789
790 /* MSAA sample locations.
791 * The first index is the sample index.
792 * The second index is the coordinate: X, Y. */
793 float sample_locations_1x[1][2];
794 float sample_locations_2x[2][2];
795 float sample_locations_4x[4][2];
796 float sample_locations_8x[8][2];
797
798 /* GFX7 and later */
799 uint32_t gfx_init_size_dw;
800 struct radeon_winsys_bo *gfx_init;
801
802 struct radeon_winsys_bo *trace_bo;
803 uint32_t *trace_id_ptr;
804
805 /* Whether to keep shader debug info, for debugging. */
806 bool keep_shader_info;
807
808 struct radv_physical_device *physical_device;
809
810 /* Backup in-memory cache to be used if the app doesn't provide one */
811 struct radv_pipeline_cache *mem_cache;
812
813 /*
814 * use different counters so MSAA MRTs get consecutive surface indices,
815 * even if MASK is allocated in between.
816 */
817 uint32_t image_mrt_offset_counter;
818 uint32_t fmask_mrt_offset_counter;
819
820 struct list_head shader_arenas;
821 unsigned shader_arena_shift;
822 uint8_t shader_free_list_mask;
823 struct list_head shader_free_lists[RADV_SHADER_ALLOC_NUM_FREE_LISTS];
824 struct list_head shader_block_obj_pool;
825 mtx_t shader_arena_mutex;
826
827 /* For detecting VM faults reported by dmesg. */
828 uint64_t dmesg_timestamp;
829
830 /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
831 bool robust_buffer_access;
832 bool robust_buffer_access2;
833
834 /* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
835 * on some GFX10.3 chips.
836 */
837 bool adjust_frag_coord_z;
838
839 /* Whether to inline the compute dispatch size in user sgprs. */
840 bool load_grid_size_from_user_sgpr;
841
842 /* Whether the driver uses a global BO list. */
843 bool use_global_bo_list;
844
845 /* Whether attachment VRS is enabled. */
846 bool attachment_vrs_enabled;
847
848 /* Whether shader image 32-bit float atomics are enabled. */
849 bool image_float32_atomics;
850
851 /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
852 int force_aniso;
853
854 struct radv_device_border_color_data border_color_data;
855
856 /* Thread trace. */
857 struct ac_thread_trace_data thread_trace;
858
859 /* SPM. */
860 struct ac_spm_trace_data spm_trace;
861
862 /* Performance counters. */
863 struct ac_perfcounters perfcounters;
864
865 /* Trap handler. */
866 struct radv_trap_handler_shader *trap_handler_shader;
867 struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
868 uint32_t *tma_ptr;
869
870 /* Overallocation. */
871 bool overallocation_disallowed;
872 uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
873 mtx_t overallocation_mutex;
874
875 /* RADV_FORCE_VRS. */
876 struct radv_notifier notifier;
877 enum radv_force_vrs force_vrs;
878
879 /* Depth image for VRS when not bound by the app. */
880 struct {
881 struct radv_image *image;
882 struct radv_buffer *buffer; /* HTILE */
883 struct radv_device_memory *mem;
884 } vrs;
885
886 struct u_rwlock vs_prologs_lock;
887 struct hash_table *vs_prologs;
888
889 /* Prime blit sdma queue */
890 struct radv_queue *private_sdma_queue;
891
892 struct radv_shader_prolog *simple_vs_prologs[MAX_VERTEX_ATTRIBS];
893 struct radv_shader_prolog *instance_rate_vs_prologs[816];
894
895 simple_mtx_t trace_mtx;
896
897 /* Whether per-vertex VRS is forced. */
898 bool force_vrs_enabled;
899 };
900
901 struct radv_device_memory {
902 struct vk_object_base base;
903 struct radeon_winsys_bo *bo;
904 /* for dedicated allocations */
905 struct radv_image *image;
906 struct radv_buffer *buffer;
907 uint32_t heap_index;
908 uint64_t alloc_size;
909 void *map;
910 void *user_ptr;
911
912 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
913 struct AHardwareBuffer *android_hardware_buffer;
914 #endif
915 };
916
917 void radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
918 struct radeon_winsys_bo *bo);
919 void radv_device_memory_finish(struct radv_device_memory *mem);
920
921 struct radv_descriptor_range {
922 uint64_t va;
923 uint32_t size;
924 };
925
926 struct radv_descriptor_set_header {
927 struct vk_object_base base;
928 struct radv_descriptor_set_layout *layout;
929 uint32_t size;
930 uint32_t buffer_count;
931
932 struct radeon_winsys_bo *bo;
933 uint64_t va;
934 uint32_t *mapped_ptr;
935 struct radv_descriptor_range *dynamic_descriptors;
936 };
937
938 struct radv_descriptor_set {
939 struct radv_descriptor_set_header header;
940
941 struct radeon_winsys_bo *descriptors[];
942 };
943
944 struct radv_push_descriptor_set {
945 struct radv_descriptor_set_header set;
946 uint32_t capacity;
947 };
948
949 struct radv_descriptor_pool_entry {
950 uint32_t offset;
951 uint32_t size;
952 struct radv_descriptor_set *set;
953 };
954
955 struct radv_descriptor_pool {
956 struct vk_object_base base;
957 struct radeon_winsys_bo *bo;
958 uint8_t *host_bo;
959 uint8_t *mapped_ptr;
960 uint64_t current_offset;
961 uint64_t size;
962
963 uint8_t *host_memory_base;
964 uint8_t *host_memory_ptr;
965 uint8_t *host_memory_end;
966
967 uint32_t entry_count;
968 uint32_t max_entry_count;
969 struct radv_descriptor_pool_entry entries[0];
970 };
971
972 struct radv_descriptor_update_template_entry {
973 VkDescriptorType descriptor_type;
974
975 /* The number of descriptors to update */
976 uint32_t descriptor_count;
977
978 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
979 uint32_t dst_offset;
980
981 /* In dwords. Not valid/used for dynamic descriptors */
982 uint32_t dst_stride;
983
984 uint32_t buffer_offset;
985
986 /* Only valid for combined image samplers and samplers */
987 uint8_t has_sampler;
988 uint8_t sampler_offset;
989
990 /* In bytes */
991 size_t src_offset;
992 size_t src_stride;
993
994 /* For push descriptors */
995 const uint32_t *immutable_samplers;
996 };
997
998 struct radv_descriptor_update_template {
999 struct vk_object_base base;
1000 uint32_t entry_count;
1001 VkPipelineBindPoint bind_point;
1002 struct radv_descriptor_update_template_entry entry[0];
1003 };
1004
1005 void radv_descriptor_set_layout_destroy(struct radv_device *device,
1006 struct radv_descriptor_set_layout *set_layout);
1007
1008 static inline void
radv_descriptor_set_layout_ref(struct radv_descriptor_set_layout * set_layout)1009 radv_descriptor_set_layout_ref(struct radv_descriptor_set_layout *set_layout)
1010 {
1011 assert(set_layout && set_layout->ref_cnt >= 1);
1012 p_atomic_inc(&set_layout->ref_cnt);
1013 }
1014
1015 static inline void
radv_descriptor_set_layout_unref(struct radv_device * device,struct radv_descriptor_set_layout * set_layout)1016 radv_descriptor_set_layout_unref(struct radv_device *device,
1017 struct radv_descriptor_set_layout *set_layout)
1018 {
1019 assert(set_layout && set_layout->ref_cnt >= 1);
1020 if (p_atomic_dec_zero(&set_layout->ref_cnt))
1021 radv_descriptor_set_layout_destroy(device, set_layout);
1022 }
1023
1024 struct radv_buffer {
1025 struct vk_object_base base;
1026 VkDeviceSize size;
1027
1028 VkBufferUsageFlags usage;
1029 VkBufferCreateFlags flags;
1030
1031 /* Set when bound */
1032 struct radeon_winsys_bo *bo;
1033 VkDeviceSize offset;
1034 };
1035
1036 void radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
1037 struct radeon_winsys_bo *bo, uint64_t size, uint64_t offset);
1038 void radv_buffer_finish(struct radv_buffer *buffer);
1039
1040 enum radv_dynamic_state_bits {
1041 RADV_DYNAMIC_VIEWPORT = 1ull << 0,
1042 RADV_DYNAMIC_SCISSOR = 1ull << 1,
1043 RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
1044 RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1045 RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1046 RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1047 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1048 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1049 RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1050 RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1051 RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1052 RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1053 RADV_DYNAMIC_CULL_MODE = 1ull << 12,
1054 RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
1055 RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1056 RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1057 RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1058 RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1059 RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1060 RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1061 RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
1062 RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1063 RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1064 RADV_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1065 RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1066 RADV_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1067 RADV_DYNAMIC_LOGIC_OP = 1ull << 26,
1068 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1069 RADV_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1070 RADV_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1071 RADV_DYNAMIC_ALL = (1ull << 30) - 1,
1072 };
1073
1074 enum radv_cmd_dirty_bits {
1075 /* Keep the dynamic state dirty bits in sync with
1076 * enum radv_dynamic_state_bits */
1077 RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
1078 RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
1079 RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
1080 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
1081 RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
1082 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
1083 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
1084 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
1085 RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
1086 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
1087 RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
1088 RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
1089 RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
1090 RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
1091 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
1092 RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
1093 RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
1094 RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
1095 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
1096 RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
1097 RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
1098 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
1099 RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
1100 RADV_CMD_DIRTY_DYNAMIC_PATCH_CONTROL_POINTS = 1ull << 23,
1101 RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1ull << 24,
1102 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1ull << 25,
1103 RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1ull << 26,
1104 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1ull << 27,
1105 RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE = 1ull << 28,
1106 RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT = 1ull << 29,
1107 RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 30) - 1,
1108 RADV_CMD_DIRTY_PIPELINE = 1ull << 30,
1109 RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 31,
1110 RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 32,
1111 RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 33,
1112 RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 34,
1113 };
1114
1115 enum radv_cmd_flush_bits {
1116 /* Instruction cache. */
1117 RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
1118 /* Scalar L1 cache. */
1119 RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
1120 /* Vector L1 cache. */
1121 RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
1122 /* L2 cache + L2 metadata cache writeback & invalidate.
1123 * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
1124 RADV_CMD_FLAG_INV_L2 = 1 << 3,
1125 /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
1126 * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
1127 * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
1128 RADV_CMD_FLAG_WB_L2 = 1 << 4,
1129 /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
1130 * changed and we want to read an image from shaders. */
1131 RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
1132 /* Framebuffer caches */
1133 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
1134 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
1135 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
1136 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
1137 /* Engine synchronization. */
1138 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
1139 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
1140 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
1141 RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
1142 /* Pipeline query controls. */
1143 RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
1144 RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
1145 RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
1146
1147 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
1148 (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
1149 RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
1150 };
1151
1152 struct radv_vertex_binding {
1153 struct radv_buffer *buffer;
1154 VkDeviceSize offset;
1155 VkDeviceSize size;
1156 VkDeviceSize stride;
1157 };
1158
1159 struct radv_streamout_binding {
1160 struct radv_buffer *buffer;
1161 VkDeviceSize offset;
1162 VkDeviceSize size;
1163 };
1164
1165 struct radv_streamout_state {
1166 /* Mask of bound streamout buffers. */
1167 uint8_t enabled_mask;
1168
1169 /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
1170 uint32_t hw_enabled_mask;
1171
1172 /* State of VGT_STRMOUT_(CONFIG|EN) */
1173 bool streamout_enabled;
1174 };
1175
1176 struct radv_viewport_state {
1177 uint32_t count;
1178 VkViewport viewports[MAX_VIEWPORTS];
1179 struct {
1180 float scale[3];
1181 float translate[3];
1182 } xform[MAX_VIEWPORTS];
1183 };
1184
1185 struct radv_scissor_state {
1186 uint32_t count;
1187 VkRect2D scissors[MAX_SCISSORS];
1188 };
1189
1190 struct radv_discard_rectangle_state {
1191 uint32_t count;
1192 VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
1193 };
1194
1195 struct radv_sample_locations_state {
1196 VkSampleCountFlagBits per_pixel;
1197 VkExtent2D grid_size;
1198 uint32_t count;
1199 VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
1200 };
1201
1202 struct radv_dynamic_state {
1203 /**
1204 * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
1205 * Defines the set of saved dynamic state.
1206 */
1207 uint64_t mask;
1208
1209 struct radv_viewport_state viewport;
1210
1211 struct radv_scissor_state scissor;
1212
1213 float line_width;
1214
1215 struct {
1216 float bias;
1217 float clamp;
1218 float slope;
1219 } depth_bias;
1220
1221 float blend_constants[4];
1222
1223 struct {
1224 float min;
1225 float max;
1226 } depth_bounds;
1227
1228 struct {
1229 uint32_t front;
1230 uint32_t back;
1231 } stencil_compare_mask;
1232
1233 struct {
1234 uint32_t front;
1235 uint32_t back;
1236 } stencil_write_mask;
1237
1238 struct {
1239 struct {
1240 VkStencilOp fail_op;
1241 VkStencilOp pass_op;
1242 VkStencilOp depth_fail_op;
1243 VkCompareOp compare_op;
1244 } front;
1245
1246 struct {
1247 VkStencilOp fail_op;
1248 VkStencilOp pass_op;
1249 VkStencilOp depth_fail_op;
1250 VkCompareOp compare_op;
1251 } back;
1252 } stencil_op;
1253
1254 struct {
1255 uint32_t front;
1256 uint32_t back;
1257 } stencil_reference;
1258
1259 struct radv_discard_rectangle_state discard_rectangle;
1260
1261 struct radv_sample_locations_state sample_location;
1262
1263 struct {
1264 uint32_t factor;
1265 uint16_t pattern;
1266 } line_stipple;
1267
1268 VkCullModeFlags cull_mode;
1269 VkFrontFace front_face;
1270 unsigned primitive_topology;
1271
1272 bool depth_test_enable;
1273 bool depth_write_enable;
1274 VkCompareOp depth_compare_op;
1275 bool depth_bounds_test_enable;
1276 bool stencil_test_enable;
1277
1278 struct {
1279 VkExtent2D size;
1280 VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
1281 } fragment_shading_rate;
1282
1283 bool depth_bias_enable;
1284 bool primitive_restart_enable;
1285 bool rasterizer_discard_enable;
1286
1287 unsigned logic_op;
1288
1289 uint32_t color_write_enable;
1290 };
1291
1292 extern const struct radv_dynamic_state default_dynamic_state;
1293
1294 const char *radv_get_debug_option_name(int id);
1295
1296 const char *radv_get_perftest_option_name(int id);
1297
1298 int radv_get_int_debug_option(const char *name, int default_value);
1299
1300 struct radv_color_buffer_info {
1301 uint64_t cb_color_base;
1302 uint64_t cb_color_cmask;
1303 uint64_t cb_color_fmask;
1304 uint64_t cb_dcc_base;
1305 uint32_t cb_color_slice;
1306 uint32_t cb_color_view;
1307 uint32_t cb_color_info;
1308 uint32_t cb_color_attrib;
1309 uint32_t cb_color_attrib2; /* GFX9 and later */
1310 uint32_t cb_color_attrib3; /* GFX10 and later */
1311 uint32_t cb_dcc_control;
1312 uint32_t cb_color_cmask_slice;
1313 uint32_t cb_color_fmask_slice;
1314 union {
1315 uint32_t cb_color_pitch; // GFX6-GFX8
1316 uint32_t cb_mrt_epitch; // GFX9+
1317 };
1318 };
1319
1320 struct radv_ds_buffer_info {
1321 uint64_t db_z_read_base;
1322 uint64_t db_stencil_read_base;
1323 uint64_t db_z_write_base;
1324 uint64_t db_stencil_write_base;
1325 uint64_t db_htile_data_base;
1326 uint32_t db_depth_info;
1327 uint32_t db_z_info;
1328 uint32_t db_stencil_info;
1329 uint32_t db_depth_view;
1330 uint32_t db_depth_size;
1331 uint32_t db_depth_slice;
1332 uint32_t db_htile_surface;
1333 uint32_t pa_su_poly_offset_db_fmt_cntl;
1334 uint32_t db_z_info2; /* GFX9 only */
1335 uint32_t db_stencil_info2; /* GFX9 only */
1336 };
1337
1338 void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1339 struct radv_image_view *iview);
1340 void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
1341 struct radv_image_view *iview);
1342 void radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
1343 struct radv_ds_buffer_info *ds);
1344
1345 /**
1346 * Attachment state when recording a renderpass instance.
1347 *
1348 * The clear value is valid only if there exists a pending clear.
1349 */
1350 struct radv_attachment_state {
1351 VkImageAspectFlags pending_clear_aspects;
1352 uint32_t cleared_views;
1353 VkClearValue clear_value;
1354 VkImageLayout current_layout;
1355 VkImageLayout current_stencil_layout;
1356 bool current_in_render_loop;
1357 bool disable_dcc;
1358 struct radv_sample_locations_state sample_location;
1359
1360 union {
1361 struct radv_color_buffer_info cb;
1362 struct radv_ds_buffer_info ds;
1363 };
1364 struct radv_image_view *iview;
1365 };
1366
1367 struct radv_descriptor_state {
1368 struct radv_descriptor_set *sets[MAX_SETS];
1369 uint32_t dirty;
1370 uint32_t valid;
1371 struct radv_push_descriptor_set push_set;
1372 bool push_dirty;
1373 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
1374 };
1375
1376 struct radv_subpass_sample_locs_state {
1377 uint32_t subpass_idx;
1378 struct radv_sample_locations_state sample_location;
1379 };
1380
1381 enum rgp_flush_bits {
1382 RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
1383 RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
1384 RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
1385 RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
1386 RGP_FLUSH_PFP_SYNC_ME = 0x10,
1387 RGP_FLUSH_SYNC_CP_DMA = 0x20,
1388 RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
1389 RGP_FLUSH_INVAL_ICACHE = 0x80,
1390 RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
1391 RGP_FLUSH_FLUSH_L2 = 0x200,
1392 RGP_FLUSH_INVAL_L2 = 0x400,
1393 RGP_FLUSH_FLUSH_CB = 0x800,
1394 RGP_FLUSH_INVAL_CB = 0x1000,
1395 RGP_FLUSH_FLUSH_DB = 0x2000,
1396 RGP_FLUSH_INVAL_DB = 0x4000,
1397 RGP_FLUSH_INVAL_L1 = 0x8000,
1398 };
1399
1400 struct radv_cmd_state {
1401 /* Vertex descriptors */
1402 uint64_t vb_va;
1403
1404 bool predicating;
1405 uint64_t dirty;
1406
1407 uint32_t prefetch_L2_mask;
1408
1409 struct radv_pipeline *pipeline;
1410 struct radv_pipeline *emitted_pipeline;
1411 struct radv_pipeline *compute_pipeline;
1412 struct radv_pipeline *emitted_compute_pipeline;
1413 struct radv_pipeline *rt_pipeline; /* emitted = emitted_compute_pipeline */
1414 struct radv_framebuffer *framebuffer;
1415 struct radv_render_pass *pass;
1416 const struct radv_subpass *subpass;
1417 struct radv_dynamic_state dynamic;
1418 struct radv_vs_input_state dynamic_vs_input;
1419 struct radv_attachment_state *attachments;
1420 struct radv_streamout_state streamout;
1421 VkRect2D render_area;
1422
1423 uint32_t num_subpass_sample_locs;
1424 struct radv_subpass_sample_locs_state *subpass_sample_locs;
1425
1426 /* Index buffer */
1427 struct radv_buffer *index_buffer;
1428 uint64_t index_offset;
1429 uint32_t index_type;
1430 uint32_t max_index_count;
1431 uint64_t index_va;
1432 int32_t last_index_type;
1433
1434 int32_t last_primitive_reset_en;
1435 uint32_t last_primitive_reset_index;
1436 enum radv_cmd_flush_bits flush_bits;
1437 unsigned active_occlusion_queries;
1438 bool perfect_occlusion_queries_enabled;
1439 unsigned active_pipeline_queries;
1440 unsigned active_pipeline_gds_queries;
1441 uint32_t trace_id;
1442 uint32_t last_ia_multi_vgt_param;
1443
1444 uint32_t last_num_instances;
1445 uint32_t last_first_instance;
1446 uint32_t last_vertex_offset;
1447 uint32_t last_drawid;
1448
1449 uint32_t last_sx_ps_downconvert;
1450 uint32_t last_sx_blend_opt_epsilon;
1451 uint32_t last_sx_blend_opt_control;
1452
1453 /* Whether CP DMA is busy/idle. */
1454 bool dma_is_busy;
1455
1456 /* Whether any images that are not L2 coherent are dirty from the CB. */
1457 bool rb_noncoherent_dirty;
1458
1459 /* Conditional rendering info. */
1460 uint8_t predication_op; /* 32-bit or 64-bit predicate value */
1461 int predication_type; /* -1: disabled, 0: normal, 1: inverted */
1462 uint64_t predication_va;
1463
1464 /* Inheritance info. */
1465 VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
1466
1467 bool context_roll_without_scissor_emitted;
1468
1469 /* SQTT related state. */
1470 uint32_t current_event_type;
1471 uint32_t num_events;
1472 uint32_t num_layout_transitions;
1473 bool pending_sqtt_barrier_end;
1474 enum rgp_flush_bits sqtt_flush_bits;
1475
1476 /* NGG culling state. */
1477 uint32_t last_nggc_settings;
1478 int8_t last_nggc_settings_sgpr_idx;
1479 bool last_nggc_skip;
1480
1481 /* Mesh shading state. */
1482 bool mesh_shading;
1483
1484 uint8_t cb_mip[MAX_RTS];
1485
1486 /* Whether DRAW_{INDEX}_INDIRECT_MULTI is emitted. */
1487 bool uses_draw_indirect_multi;
1488
1489 uint32_t rt_stack_size;
1490
1491 struct radv_shader_prolog *emitted_vs_prolog;
1492 uint32_t *emitted_vs_prolog_key;
1493 uint32_t emitted_vs_prolog_key_hash;
1494 uint32_t vbo_misaligned_mask;
1495 uint32_t vbo_bound_mask;
1496
1497 /* Whether the cmdbuffer owns the current render pass rather than the app. */
1498 bool own_render_pass;
1499
1500 /* Per-vertex VRS state. */
1501 uint32_t last_vrs_rates;
1502 int8_t last_vrs_rates_sgpr_idx;
1503 };
1504
1505 struct radv_cmd_pool {
1506 struct vk_command_pool vk;
1507 struct list_head cmd_buffers;
1508 struct list_head free_cmd_buffers;
1509 };
1510
1511 struct radv_cmd_buffer_upload {
1512 uint8_t *map;
1513 unsigned offset;
1514 uint64_t size;
1515 struct radeon_winsys_bo *upload_bo;
1516 struct list_head list;
1517 };
1518
1519 enum radv_cmd_buffer_status {
1520 RADV_CMD_BUFFER_STATUS_INVALID,
1521 RADV_CMD_BUFFER_STATUS_INITIAL,
1522 RADV_CMD_BUFFER_STATUS_RECORDING,
1523 RADV_CMD_BUFFER_STATUS_EXECUTABLE,
1524 RADV_CMD_BUFFER_STATUS_PENDING,
1525 };
1526
1527 struct radv_cmd_buffer {
1528 struct vk_command_buffer vk;
1529
1530 struct radv_device *device;
1531
1532 struct radv_cmd_pool *pool;
1533 struct list_head pool_link;
1534
1535 VkCommandBufferUsageFlags usage_flags;
1536 enum radv_cmd_buffer_status status;
1537 struct radeon_cmdbuf *cs;
1538 struct radv_cmd_state state;
1539 struct radv_vertex_binding vertex_bindings[MAX_VBS];
1540 struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
1541 enum radv_queue_family qf;
1542
1543 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
1544 VkShaderStageFlags push_constant_stages;
1545 struct radv_descriptor_set_header meta_push_descriptors;
1546
1547 struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
1548
1549 struct radv_cmd_buffer_upload upload;
1550
1551 uint32_t scratch_size_per_wave_needed;
1552 uint32_t scratch_waves_wanted;
1553 uint32_t compute_scratch_size_per_wave_needed;
1554 uint32_t compute_scratch_waves_wanted;
1555 uint32_t esgs_ring_size_needed;
1556 uint32_t gsvs_ring_size_needed;
1557 bool tess_rings_needed;
1558 bool gds_needed; /* for GFX10 streamout and NGG GS queries */
1559 bool gds_oa_needed; /* for GFX10 streamout */
1560 bool sample_positions_needed;
1561
1562 VkResult record_result;
1563
1564 uint64_t gfx9_fence_va;
1565 uint32_t gfx9_fence_idx;
1566 uint64_t gfx9_eop_bug_va;
1567
1568 /**
1569 * Whether a query pool has been resetted and we have to flush caches.
1570 */
1571 bool pending_reset_query;
1572
1573 /**
1574 * Bitmask of pending active query flushes.
1575 */
1576 enum radv_cmd_flush_bits active_query_flush_bits;
1577 };
1578
1579 struct radv_image;
1580 struct radv_image_view;
1581
1582 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
1583
1584 void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
1585 void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
1586
1587 void cik_create_gfx_config(struct radv_device *device);
1588
1589 void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
1590 const VkViewport *viewports, bool can_use_guardband);
1591 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
1592 bool indirect_draw, bool count_from_stream_output,
1593 uint32_t draw_vertex_count, unsigned topology,
1594 bool prim_restart_enable);
1595 void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
1596 unsigned event, unsigned event_flags, unsigned dst_sel,
1597 unsigned data_sel, uint64_t va, uint32_t new_fence,
1598 uint64_t gfx9_eop_bug_va);
1599
1600 void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
1601 uint32_t mask);
1602 void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
1603 uint32_t *fence_ptr, uint64_t va, bool is_mec,
1604 enum radv_cmd_flush_bits flush_bits,
1605 enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
1606 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1607 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
1608 unsigned pred_op, uint64_t va);
1609 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
1610 uint64_t size);
1611 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
1612 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
1613 unsigned value);
1614 void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
1615
1616 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
1617
1618 unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs);
1619 uint32_t radv_hash_vs_prolog(const void *key_);
1620 bool radv_cmp_vs_prolog(const void *a_, const void *b_);
1621
1622 bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1623 unsigned *out_offset, void **ptr);
1624 void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1625 const struct radv_subpass *subpass);
1626 void radv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
1627 const struct radv_subpass *subpass);
1628 bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
1629 const void *data, unsigned *out_offset);
1630
1631 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1632 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1633 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1634 void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
1635 VkImageAspectFlags aspects,
1636 VkResolveModeFlagBits resolve_mode);
1637 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1638 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
1639 VkImageAspectFlags aspects,
1640 VkResolveModeFlagBits resolve_mode);
1641 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
1642 unsigned radv_get_default_max_sample_dist(int log_samples);
1643 void radv_device_init_msaa(struct radv_device *device);
1644 VkResult radv_device_init_vrs_state(struct radv_device *device);
1645
1646 void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1647 const struct radv_image_view *iview,
1648 VkClearDepthStencilValue ds_clear_value,
1649 VkImageAspectFlags aspects);
1650
1651 void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
1652 const struct radv_image_view *iview, int cb_idx,
1653 uint32_t color_values[2]);
1654
1655 bool radv_image_use_dcc_image_stores(const struct radv_device *device,
1656 const struct radv_image *image);
1657 bool radv_image_use_dcc_predication(const struct radv_device *device,
1658 const struct radv_image *image);
1659
1660 void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1661 const VkImageSubresourceRange *range, bool value);
1662
1663 void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1664 const VkImageSubresourceRange *range, bool value);
1665 enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
1666 VkAccessFlags2KHR src_flags,
1667 const struct radv_image *image);
1668 enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
1669 VkAccessFlags2KHR dst_flags,
1670 const struct radv_image *image);
1671 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1672 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
1673 uint32_t value);
1674 void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
1675 struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
1676 uint64_t size);
1677
1678 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1679 bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
1680 void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1681 struct radv_device_memory *mem);
1682
1683 static inline void
radv_emit_shader_pointer_head(struct radeon_cmdbuf * cs,unsigned sh_offset,unsigned pointer_count,bool use_32bit_pointers)1684 radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
1685 bool use_32bit_pointers)
1686 {
1687 radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
1688 radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
1689 }
1690
1691 static inline void
radv_emit_shader_pointer_body(struct radv_device * device,struct radeon_cmdbuf * cs,uint64_t va,bool use_32bit_pointers)1692 radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
1693 bool use_32bit_pointers)
1694 {
1695 radeon_emit(cs, va);
1696
1697 if (use_32bit_pointers) {
1698 assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
1699 } else {
1700 radeon_emit(cs, va >> 32);
1701 }
1702 }
1703
1704 static inline void
radv_emit_shader_pointer(struct radv_device * device,struct radeon_cmdbuf * cs,uint32_t sh_offset,uint64_t va,bool global)1705 radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
1706 uint64_t va, bool global)
1707 {
1708 bool use_32bit_pointers = !global;
1709
1710 radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
1711 radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
1712 }
1713
1714 static inline struct radv_descriptor_state *
radv_get_descriptors_state(struct radv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1715 radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
1716 {
1717 switch (bind_point) {
1718 case VK_PIPELINE_BIND_POINT_GRAPHICS:
1719 case VK_PIPELINE_BIND_POINT_COMPUTE:
1720 return &cmd_buffer->descriptors[bind_point];
1721 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1722 return &cmd_buffer->descriptors[2];
1723 default:
1724 unreachable("Unhandled bind point");
1725 }
1726 }
1727
1728 void
1729 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3]);
1730
1731 /*
1732 * Takes x,y,z as exact numbers of invocations, instead of blocks.
1733 *
1734 * Limitations: Can't call normal dispatch functions without binding or rebinding
1735 * the compute pipeline.
1736 */
1737 void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
1738 uint32_t z);
1739
1740 void radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo,
1741 uint64_t va);
1742
1743 struct radv_event {
1744 struct vk_object_base base;
1745 struct radeon_winsys_bo *bo;
1746 uint64_t *map;
1747 };
1748
1749 #define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
1750 #define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
1751 #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
1752 #define RADV_HASH_SHADER_LLVM (1 << 4)
1753 #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
1754 #define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
1755 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
1756 #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
1757 #define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16)
1758 #define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
1759 #define RADV_HASH_SHADER_RT_WAVE64 (1 << 18)
1760
1761 struct radv_pipeline_key;
1762
1763 void radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,
1764 const struct radv_pipeline_layout *layout,
1765 const struct radv_pipeline_key *key, uint32_t flags);
1766
1767 void radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
1768 uint32_t flags);
1769
1770 uint32_t radv_get_hash_flags(const struct radv_device *device, bool stats);
1771
1772 bool radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo);
1773
1774 #define RADV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
1775
1776 #define radv_foreach_stage(stage, stage_bits) \
1777 for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \
1778 stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1779
1780 extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
1781 unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
1782
1783 struct radv_multisample_state {
1784 uint32_t db_eqaa;
1785 uint32_t pa_sc_mode_cntl_0;
1786 uint32_t pa_sc_mode_cntl_1;
1787 uint32_t pa_sc_aa_config;
1788 uint32_t pa_sc_aa_mask[2];
1789 unsigned num_samples;
1790 };
1791
1792 struct radv_vrs_state {
1793 uint32_t pa_cl_vrs_cntl;
1794 };
1795
1796 struct radv_prim_vertex_count {
1797 uint8_t min;
1798 uint8_t incr;
1799 };
1800
1801 struct radv_ia_multi_vgt_param_helpers {
1802 uint32_t base;
1803 bool partial_es_wave;
1804 uint8_t primgroup_size;
1805 bool ia_switch_on_eoi;
1806 bool partial_vs_wave;
1807 };
1808
1809 struct radv_binning_state {
1810 uint32_t pa_sc_binner_cntl_0;
1811 };
1812
1813 #define SI_GS_PER_ES 128
1814
1815 enum radv_pipeline_type {
1816 RADV_PIPELINE_GRAPHICS,
1817 /* Compute pipeline (incl raytracing pipeline) */
1818 RADV_PIPELINE_COMPUTE,
1819 /* Pipeline library. This can't actually run and merely is a partial pipeline. */
1820 RADV_PIPELINE_LIBRARY
1821 };
1822
1823 struct radv_pipeline_group_handle {
1824 uint32_t handles[2];
1825 };
1826
1827 struct radv_pipeline_shader_stack_size {
1828 uint32_t recursive_size;
1829 /* anyhit + intersection */
1830 uint32_t non_recursive_size;
1831 };
1832
1833 struct radv_pipeline_slab {
1834 uint32_t ref_count;
1835
1836 union radv_shader_arena_block *alloc;
1837 };
1838
1839 void radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab);
1840
1841 struct radv_pipeline {
1842 struct vk_object_base base;
1843 enum radv_pipeline_type type;
1844
1845 struct radv_device *device;
1846 struct radv_dynamic_state dynamic_state;
1847
1848 struct radv_pipeline_slab *slab;
1849
1850 bool need_indirect_descriptor_sets;
1851 struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
1852 struct radv_shader *gs_copy_shader;
1853 VkShaderStageFlags active_stages;
1854
1855 struct radeon_cmdbuf cs;
1856 uint32_t ctx_cs_hash;
1857 struct radeon_cmdbuf ctx_cs;
1858
1859 uint32_t binding_stride[MAX_VBS];
1860
1861 uint8_t attrib_bindings[MAX_VERTEX_ATTRIBS];
1862 uint32_t attrib_ends[MAX_VERTEX_ATTRIBS];
1863 uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
1864
1865 bool use_per_attribute_vb_descs;
1866 bool can_use_simple_input;
1867 uint8_t last_vertex_attrib_bit;
1868 uint8_t next_vertex_stage : 8;
1869 uint32_t vb_desc_usage_mask;
1870 uint32_t vb_desc_alloc_size;
1871
1872 uint32_t user_data_0[MESA_VULKAN_SHADER_STAGES];
1873 union {
1874 struct {
1875 struct radv_multisample_state ms;
1876 struct radv_binning_state binning;
1877 struct radv_vrs_state vrs;
1878 uint32_t spi_baryc_cntl;
1879 unsigned esgs_ring_size;
1880 unsigned gsvs_ring_size;
1881 uint32_t vtx_base_sgpr;
1882 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
1883 uint8_t vtx_emit_num;
1884 bool uses_drawid;
1885 bool uses_baseinstance;
1886 bool can_use_guardband;
1887 uint64_t needed_dynamic_state;
1888 bool disable_out_of_order_rast_for_occlusion;
1889 unsigned tess_patch_control_points;
1890 unsigned pa_su_sc_mode_cntl;
1891 unsigned db_depth_control;
1892 unsigned pa_cl_clip_cntl;
1893 unsigned cb_color_control;
1894 bool uses_dynamic_stride;
1895 bool uses_conservative_overestimate;
1896 bool negative_one_to_one;
1897
1898 /* Used for rbplus */
1899 uint32_t col_format;
1900 uint32_t cb_target_mask;
1901
1902 /* Whether the pipeline uses NGG (GFX10+). */
1903 bool is_ngg;
1904 bool has_ngg_culling;
1905
1906 /* Last pre-PS API stage */
1907 gl_shader_stage last_vgt_api_stage;
1908
1909 /* Whether the pipeline forces per-vertex VRS (GFX10.3+). */
1910 bool force_vrs_per_vertex;
1911 } graphics;
1912 struct {
1913 struct radv_pipeline_group_handle *rt_group_handles;
1914 struct radv_pipeline_shader_stack_size *rt_stack_sizes;
1915 bool dynamic_stack_size;
1916 uint32_t group_count;
1917 bool cs_regalloc_hang_bug;
1918 } compute;
1919 struct {
1920 unsigned stage_count;
1921 VkPipelineShaderStageCreateInfo *stages;
1922 unsigned group_count;
1923 VkRayTracingShaderGroupCreateInfoKHR *groups;
1924 } library;
1925 };
1926
1927 unsigned max_waves;
1928 unsigned scratch_bytes_per_wave;
1929
1930 /* Not NULL if graphics pipeline uses streamout. */
1931 struct radv_shader *streamout_shader;
1932
1933 /* Unique pipeline hash identifier. */
1934 uint64_t pipeline_hash;
1935
1936 /* Pipeline layout info. */
1937 uint32_t push_constant_size;
1938 uint32_t dynamic_offset_count;
1939 };
1940
1941 static inline bool
radv_pipeline_has_gs(const struct radv_pipeline * pipeline)1942 radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
1943 {
1944 return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
1945 }
1946
1947 static inline bool
radv_pipeline_has_tess(const struct radv_pipeline * pipeline)1948 radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
1949 {
1950 return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
1951 }
1952
1953 static inline bool
radv_pipeline_has_mesh(const struct radv_pipeline * pipeline)1954 radv_pipeline_has_mesh(const struct radv_pipeline *pipeline)
1955 {
1956 return !!pipeline->shaders[MESA_SHADER_MESH];
1957 }
1958
1959 bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline);
1960
1961 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
1962
1963 struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
1964 gl_shader_stage stage, int idx);
1965
1966 struct radv_shader *radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage);
1967
1968 struct radv_graphics_pipeline_create_info {
1969 bool use_rectlist;
1970 bool db_depth_clear;
1971 bool db_stencil_clear;
1972 bool depth_compress_disable;
1973 bool stencil_compress_disable;
1974 bool resummarize_enable;
1975 uint32_t custom_blend_mode;
1976 };
1977
1978 VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
1979 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1980 const struct radv_graphics_pipeline_create_info *extra,
1981 const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
1982
1983 VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
1984 const VkComputePipelineCreateInfo *pCreateInfo,
1985 const VkAllocationCallbacks *pAllocator,
1986 const uint8_t *custom_hash,
1987 struct radv_pipeline_shader_stack_size *rt_stack_sizes,
1988 uint32_t rt_group_count, VkPipeline *pPipeline);
1989
1990 void radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
1991 const VkAllocationCallbacks *allocator);
1992
1993 struct radv_binning_settings {
1994 unsigned context_states_per_bin; /* allowed range: [1, 6] */
1995 unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
1996 unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
1997 };
1998
1999 struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
2000
2001 struct vk_format_description;
2002 uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
2003 int first_non_void);
2004 uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
2005 int first_non_void);
2006 bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
2007 void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
2008 const struct util_format_description *desc, unsigned *dfmt,
2009 unsigned *nfmt, bool *post_shuffle,
2010 enum radv_vs_input_alpha_adjust *alpha_adjust);
2011 uint32_t radv_translate_colorformat(VkFormat format);
2012 uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
2013 int first_non_void);
2014 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
2015 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
2016 uint32_t radv_translate_dbformat(VkFormat format);
2017 uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
2018 int first_non_void);
2019 uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
2020 int first_non_void);
2021 bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
2022 VkClearColorValue *value);
2023 bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
2024 VkFormat format);
2025 bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
2026 VkFormat format, bool *blendable);
2027 bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2, bool *sign_reinterpret);
2028 bool radv_is_atomic_format_supported(VkFormat format);
2029 bool radv_device_supports_etc(struct radv_physical_device *physical_device);
2030
2031 struct radv_image_plane {
2032 VkFormat format;
2033 struct radeon_surf surface;
2034 };
2035
2036 struct radv_image {
2037 struct vk_object_base base;
2038 VkImageType type;
2039 /* The original VkFormat provided by the client. This may not match any
2040 * of the actual surface formats.
2041 */
2042 VkFormat vk_format;
2043 VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
2044 struct ac_surf_info info;
2045 VkImageTiling tiling; /** VkImageCreateInfo::tiling */
2046 VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
2047
2048 VkDeviceSize size;
2049 uint32_t alignment;
2050
2051 unsigned queue_family_mask;
2052 bool exclusive;
2053 bool shareable;
2054 bool l2_coherent;
2055 bool dcc_sign_reinterpret;
2056 bool support_comp_to_single;
2057
2058 /* Set when bound */
2059 struct radeon_winsys_bo *bo;
2060 VkDeviceSize offset;
2061 bool tc_compatible_cmask;
2062
2063 uint64_t clear_value_offset;
2064 uint64_t fce_pred_offset;
2065 uint64_t dcc_pred_offset;
2066
2067 /*
2068 * Metadata for the TC-compat zrange workaround. If the 32-bit value
2069 * stored at this offset is UINT_MAX, the driver will emit
2070 * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
2071 * SET_CONTEXT_REG packet.
2072 */
2073 uint64_t tc_compat_zrange_offset;
2074
2075 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
2076 VkDeviceMemory owned_memory;
2077
2078 unsigned plane_count;
2079 struct radv_image_plane planes[0];
2080 };
2081
2082 /* Whether the image has a htile that is known consistent with the contents of
2083 * the image and is allowed to be in compressed form.
2084 *
2085 * If this is false reads that don't use the htile should be able to return
2086 * correct results.
2087 */
2088 bool radv_layout_is_htile_compressed(const struct radv_device *device,
2089 const struct radv_image *image, VkImageLayout layout,
2090 bool in_render_loop, unsigned queue_mask);
2091
2092 bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2093 unsigned level, VkImageLayout layout, bool in_render_loop,
2094 unsigned queue_mask);
2095
2096 bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2097 unsigned level, VkImageLayout layout, bool in_render_loop,
2098 unsigned queue_mask);
2099
2100 bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2101 VkImageLayout layout, unsigned queue_mask);
2102
2103 /**
2104 * Return whether the image has CMASK metadata for color surfaces.
2105 */
2106 static inline bool
radv_image_has_cmask(const struct radv_image * image)2107 radv_image_has_cmask(const struct radv_image *image)
2108 {
2109 return image->planes[0].surface.cmask_offset;
2110 }
2111
2112 /**
2113 * Return whether the image has FMASK metadata for color surfaces.
2114 */
2115 static inline bool
radv_image_has_fmask(const struct radv_image * image)2116 radv_image_has_fmask(const struct radv_image *image)
2117 {
2118 return image->planes[0].surface.fmask_offset;
2119 }
2120
2121 /**
2122 * Return whether the image has DCC metadata for color surfaces.
2123 */
2124 static inline bool
radv_image_has_dcc(const struct radv_image * image)2125 radv_image_has_dcc(const struct radv_image *image)
2126 {
2127 return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
2128 image->planes[0].surface.meta_offset;
2129 }
2130
2131 /**
2132 * Return whether the image is TC-compatible CMASK.
2133 */
2134 static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image * image)2135 radv_image_is_tc_compat_cmask(const struct radv_image *image)
2136 {
2137 return radv_image_has_fmask(image) && image->tc_compatible_cmask;
2138 }
2139
2140 /**
2141 * Return whether DCC metadata is enabled for a level.
2142 */
2143 static inline bool
radv_dcc_enabled(const struct radv_image * image,unsigned level)2144 radv_dcc_enabled(const struct radv_image *image, unsigned level)
2145 {
2146 return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels;
2147 }
2148
2149 /**
2150 * Return whether the image has CB metadata.
2151 */
2152 static inline bool
radv_image_has_CB_metadata(const struct radv_image * image)2153 radv_image_has_CB_metadata(const struct radv_image *image)
2154 {
2155 return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
2156 }
2157
2158 /**
2159 * Return whether the image has HTILE metadata for depth surfaces.
2160 */
2161 static inline bool
radv_image_has_htile(const struct radv_image * image)2162 radv_image_has_htile(const struct radv_image *image)
2163 {
2164 return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER &&
2165 image->planes[0].surface.meta_size;
2166 }
2167
2168 /**
2169 * Return whether the image has VRS HTILE metadata for depth surfaces
2170 */
2171 static inline bool
radv_image_has_vrs_htile(const struct radv_device * device,const struct radv_image * image)2172 radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
2173 {
2174 /* Any depth buffer can potentially use VRS. */
2175 return device->attachment_vrs_enabled && radv_image_has_htile(image) &&
2176 (image->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2177 }
2178
2179 /**
2180 * Return whether HTILE metadata is enabled for a level.
2181 */
2182 static inline bool
radv_htile_enabled(const struct radv_image * image,unsigned level)2183 radv_htile_enabled(const struct radv_image *image, unsigned level)
2184 {
2185 return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels;
2186 }
2187
2188 /**
2189 * Return whether the image is TC-compatible HTILE.
2190 */
2191 static inline bool
radv_image_is_tc_compat_htile(const struct radv_image * image)2192 radv_image_is_tc_compat_htile(const struct radv_image *image)
2193 {
2194 return radv_image_has_htile(image) &&
2195 (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
2196 }
2197
2198 /**
2199 * Return whether the entire HTILE buffer can be used for depth in order to
2200 * improve HiZ Z-Range precision.
2201 */
2202 static inline bool
radv_image_tile_stencil_disabled(const struct radv_device * device,const struct radv_image * image)2203 radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
2204 {
2205 if (device->physical_device->rad_info.chip_class >= GFX9) {
2206 return !vk_format_has_stencil(image->vk_format) && !radv_image_has_vrs_htile(device, image);
2207 } else {
2208 /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
2209 * the TC-compat ZRANGE issue even if no stencil is used.
2210 */
2211 return !vk_format_has_stencil(image->vk_format) && !radv_image_is_tc_compat_htile(image);
2212 }
2213 }
2214
2215 static inline bool
radv_image_has_clear_value(const struct radv_image * image)2216 radv_image_has_clear_value(const struct radv_image *image)
2217 {
2218 return image->clear_value_offset != 0;
2219 }
2220
2221 static inline uint64_t
radv_image_get_fast_clear_va(const struct radv_image * image,uint32_t base_level)2222 radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
2223 {
2224 assert(radv_image_has_clear_value(image));
2225
2226 uint64_t va = radv_buffer_get_va(image->bo);
2227 va += image->offset + image->clear_value_offset + base_level * 8;
2228 return va;
2229 }
2230
2231 static inline uint64_t
radv_image_get_fce_pred_va(const struct radv_image * image,uint32_t base_level)2232 radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
2233 {
2234 assert(image->fce_pred_offset != 0);
2235
2236 uint64_t va = radv_buffer_get_va(image->bo);
2237 va += image->offset + image->fce_pred_offset + base_level * 8;
2238 return va;
2239 }
2240
2241 static inline uint64_t
radv_image_get_dcc_pred_va(const struct radv_image * image,uint32_t base_level)2242 radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
2243 {
2244 assert(image->dcc_pred_offset != 0);
2245
2246 uint64_t va = radv_buffer_get_va(image->bo);
2247 va += image->offset + image->dcc_pred_offset + base_level * 8;
2248 return va;
2249 }
2250
2251 static inline uint64_t
radv_get_tc_compat_zrange_va(const struct radv_image * image,uint32_t base_level)2252 radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
2253 {
2254 assert(image->tc_compat_zrange_offset != 0);
2255
2256 uint64_t va = radv_buffer_get_va(image->bo);
2257 va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
2258 return va;
2259 }
2260
2261 static inline uint64_t
radv_get_ds_clear_value_va(const struct radv_image * image,uint32_t base_level)2262 radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
2263 {
2264 assert(radv_image_has_clear_value(image));
2265
2266 uint64_t va = radv_buffer_get_va(image->bo);
2267 va += image->offset + image->clear_value_offset + base_level * 8;
2268 return va;
2269 }
2270
2271 static inline uint32_t
radv_get_htile_initial_value(const struct radv_device * device,const struct radv_image * image)2272 radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
2273 {
2274 uint32_t initial_value;
2275
2276 if (radv_image_tile_stencil_disabled(device, image)) {
2277 /* Z only (no stencil):
2278 *
2279 * |31 18|17 4|3 0|
2280 * +---------+---------+-------+
2281 * | Max Z | Min Z | ZMask |
2282 */
2283 initial_value = 0xfffc000f;
2284 } else {
2285 /* Z and stencil:
2286 *
2287 * |31 12|11 10|9 8|7 6|5 4|3 0|
2288 * +-----------+-----+------+-----+-----+-------+
2289 * | Z Range | | SMem | SR1 | SR0 | ZMask |
2290 *
2291 * SR0/SR1 contains the stencil test results. Initializing
2292 * SR0/SR1 to 0x3 means the stencil test result is unknown.
2293 *
2294 * Z, stencil and 4 bit VRS encoding:
2295 * |31 12|11 10|9 8|7 6|5 4|3 0|
2296 * +-----------+------------+------+------------+-----+-------+
2297 * | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
2298 */
2299 if (radv_image_has_vrs_htile(device, image)) {
2300 /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
2301 initial_value = 0xfffff33f;
2302 } else {
2303 initial_value = 0xfffff3ff;
2304 }
2305 }
2306
2307 return initial_value;
2308 }
2309
2310 static inline bool
radv_image_get_iterate256(struct radv_device * device,struct radv_image * image)2311 radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
2312 {
2313 /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
2314 return device->physical_device->rad_info.chip_class >= GFX10 &&
2315 (image->usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
2316 VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
2317 radv_image_is_tc_compat_htile(image) &&
2318 image->info.samples > 1;
2319 }
2320
2321 unsigned radv_image_queue_family_mask(const struct radv_image *image,
2322 enum radv_queue_family family,
2323 enum radv_queue_family queue_family);
2324
2325 static inline uint32_t
radv_get_layerCount(const struct radv_image * image,const VkImageSubresourceRange * range)2326 radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2327 {
2328 return range->layerCount == VK_REMAINING_ARRAY_LAYERS
2329 ? image->info.array_size - range->baseArrayLayer
2330 : range->layerCount;
2331 }
2332
2333 static inline uint32_t
radv_get_levelCount(const struct radv_image * image,const VkImageSubresourceRange * range)2334 radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
2335 {
2336 return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
2337 : range->levelCount;
2338 }
2339
2340 bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
2341
2342 struct radeon_bo_metadata;
2343 void radv_init_metadata(struct radv_device *device, struct radv_image *image,
2344 struct radeon_bo_metadata *metadata);
2345
2346 void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
2347 uint64_t offset, uint32_t stride);
2348
2349 union radv_descriptor {
2350 struct {
2351 uint32_t plane0_descriptor[8];
2352 uint32_t fmask_descriptor[8];
2353 };
2354 struct {
2355 uint32_t plane_descriptors[3][8];
2356 };
2357 };
2358
2359 struct radv_image_view {
2360 struct vk_object_base base;
2361 struct radv_image *image; /**< VkImageViewCreateInfo::image */
2362
2363 VkImageViewType type;
2364 VkImageAspectFlags aspect_mask;
2365 VkFormat vk_format;
2366 unsigned plane_id;
2367 uint32_t base_layer;
2368 uint32_t layer_count;
2369 uint32_t base_mip;
2370 uint32_t level_count;
2371 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
2372
2373 /* Whether the image iview supports fast clear. */
2374 bool support_fast_clear;
2375
2376 union radv_descriptor descriptor;
2377
2378 /* Descriptor for use as a storage image as opposed to a sampled image.
2379 * This has a few differences for cube maps (e.g. type).
2380 */
2381 union radv_descriptor storage_descriptor;
2382 };
2383
2384 struct radv_image_create_info {
2385 const VkImageCreateInfo *vk_info;
2386 bool scanout;
2387 bool no_metadata_planes;
2388 bool prime_blit_src;
2389 const struct radeon_bo_metadata *bo_metadata;
2390 };
2391
2392 VkResult
2393 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
2394 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
2395 struct radv_image *image);
2396
2397 VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
2398 const VkAllocationCallbacks *alloc, VkImage *pImage);
2399
2400 bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
2401 VkFormat format, VkImageCreateFlags flags,
2402 bool *sign_reinterpret);
2403
2404 bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
2405
2406 VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
2407 const VkNativeBufferANDROID *gralloc_info,
2408 const VkAllocationCallbacks *alloc, VkImage *out_image_h);
2409 uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
2410 const VkImageUsageFlags vk_usage);
2411 VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2412 unsigned priority,
2413 const VkImportAndroidHardwareBufferInfoANDROID *info);
2414 VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
2415 unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
2416
2417 VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
2418
2419 bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
2420
2421 struct radv_image_view_extra_create_info {
2422 bool disable_compression;
2423 bool enable_compression;
2424 };
2425
2426 void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
2427 const VkImageViewCreateInfo *pCreateInfo,
2428 const struct radv_image_view_extra_create_info *extra_create_info);
2429 void radv_image_view_finish(struct radv_image_view *iview);
2430
2431 VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
2432
2433 struct radv_sampler_ycbcr_conversion_state {
2434 VkFormat format;
2435 VkSamplerYcbcrModelConversion ycbcr_model;
2436 VkSamplerYcbcrRange ycbcr_range;
2437 VkComponentMapping components;
2438 VkChromaLocation chroma_offsets[2];
2439 VkFilter chroma_filter;
2440 };
2441
2442 struct radv_sampler_ycbcr_conversion {
2443 struct vk_object_base base;
2444 /* The state is hashed for the descriptor set layout. */
2445 struct radv_sampler_ycbcr_conversion_state state;
2446 };
2447
2448 struct radv_buffer_view {
2449 struct vk_object_base base;
2450 struct radeon_winsys_bo *bo;
2451 VkFormat vk_format;
2452 uint64_t range; /**< VkBufferViewCreateInfo::range */
2453 uint32_t state[4];
2454 };
2455 void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2456 const VkBufferViewCreateInfo *pCreateInfo);
2457 void radv_buffer_view_finish(struct radv_buffer_view *view);
2458
2459 static inline struct VkExtent3D
radv_sanitize_image_extent(const VkImageType imageType,const struct VkExtent3D imageExtent)2460 radv_sanitize_image_extent(const VkImageType imageType, const struct VkExtent3D imageExtent)
2461 {
2462 switch (imageType) {
2463 case VK_IMAGE_TYPE_1D:
2464 return (VkExtent3D){imageExtent.width, 1, 1};
2465 case VK_IMAGE_TYPE_2D:
2466 return (VkExtent3D){imageExtent.width, imageExtent.height, 1};
2467 case VK_IMAGE_TYPE_3D:
2468 return imageExtent;
2469 default:
2470 unreachable("invalid image type");
2471 }
2472 }
2473
2474 static inline struct VkOffset3D
radv_sanitize_image_offset(const VkImageType imageType,const struct VkOffset3D imageOffset)2475 radv_sanitize_image_offset(const VkImageType imageType, const struct VkOffset3D imageOffset)
2476 {
2477 switch (imageType) {
2478 case VK_IMAGE_TYPE_1D:
2479 return (VkOffset3D){imageOffset.x, 0, 0};
2480 case VK_IMAGE_TYPE_2D:
2481 return (VkOffset3D){imageOffset.x, imageOffset.y, 0};
2482 case VK_IMAGE_TYPE_3D:
2483 return imageOffset;
2484 default:
2485 unreachable("invalid image type");
2486 }
2487 }
2488
2489 static inline bool
radv_image_extent_compare(const struct radv_image * image,const VkExtent3D * extent)2490 radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
2491 {
2492 if (extent->width != image->info.width || extent->height != image->info.height ||
2493 extent->depth != image->info.depth)
2494 return false;
2495 return true;
2496 }
2497
2498 struct radv_sampler {
2499 struct vk_object_base base;
2500 uint32_t state[4];
2501 struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
2502 uint32_t border_color_slot;
2503 };
2504
2505 struct radv_framebuffer {
2506 struct vk_object_base base;
2507 uint32_t width;
2508 uint32_t height;
2509 uint32_t layers;
2510
2511
2512 uint32_t attachment_count;
2513 struct radv_image_view *attachments[0];
2514 };
2515
2516 struct radv_subpass_barrier {
2517 VkPipelineStageFlags2KHR src_stage_mask;
2518 VkAccessFlags2KHR src_access_mask;
2519 VkAccessFlags2KHR dst_access_mask;
2520 };
2521
2522 void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
2523 const struct radv_subpass_barrier *barrier);
2524
2525 struct radv_subpass_attachment {
2526 uint32_t attachment;
2527 VkImageLayout layout;
2528 VkImageLayout stencil_layout;
2529 bool in_render_loop;
2530 };
2531
2532 struct radv_subpass {
2533 uint32_t attachment_count;
2534 struct radv_subpass_attachment *attachments;
2535
2536 uint32_t input_count;
2537 uint32_t color_count;
2538 struct radv_subpass_attachment *input_attachments;
2539 struct radv_subpass_attachment *color_attachments;
2540 struct radv_subpass_attachment *resolve_attachments;
2541 struct radv_subpass_attachment *depth_stencil_attachment;
2542 struct radv_subpass_attachment *ds_resolve_attachment;
2543 struct radv_subpass_attachment *vrs_attachment;
2544 VkResolveModeFlagBits depth_resolve_mode;
2545 VkResolveModeFlagBits stencil_resolve_mode;
2546
2547 /** Subpass has at least one color resolve attachment */
2548 bool has_color_resolve;
2549
2550 struct radv_subpass_barrier start_barrier;
2551
2552 uint32_t view_mask;
2553
2554 VkSampleCountFlagBits color_sample_count;
2555 VkSampleCountFlagBits depth_sample_count;
2556 VkSampleCountFlagBits max_sample_count;
2557
2558 /* Whether the subpass has ingoing/outgoing external dependencies. */
2559 bool has_ingoing_dep;
2560 bool has_outgoing_dep;
2561 };
2562
2563 uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
2564
2565 struct radv_render_pass_attachment {
2566 VkFormat format;
2567 uint32_t samples;
2568 VkAttachmentLoadOp load_op;
2569 VkAttachmentLoadOp stencil_load_op;
2570 VkImageLayout initial_layout;
2571 VkImageLayout final_layout;
2572 VkImageLayout stencil_initial_layout;
2573 VkImageLayout stencil_final_layout;
2574
2575 /* The subpass id in which the attachment will be used first/last. */
2576 uint32_t first_subpass_idx;
2577 uint32_t last_subpass_idx;
2578 };
2579
2580 struct radv_render_pass {
2581 struct vk_object_base base;
2582 uint32_t attachment_count;
2583 uint32_t subpass_count;
2584 struct radv_subpass_attachment *subpass_attachments;
2585 struct radv_render_pass_attachment *attachments;
2586 struct radv_subpass_barrier end_barrier;
2587 struct radv_subpass subpasses[0];
2588 };
2589
2590 VkResult radv_device_init_meta(struct radv_device *device);
2591 void radv_device_finish_meta(struct radv_device *device);
2592
2593 struct radv_query_pool {
2594 struct vk_object_base base;
2595 struct radeon_winsys_bo *bo;
2596 uint32_t stride;
2597 uint32_t availability_offset;
2598 uint64_t size;
2599 char *ptr;
2600 VkQueryType type;
2601 uint32_t pipeline_stats_mask;
2602 };
2603
2604 bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
2605
2606 int radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2607 const VkDeviceQueueCreateInfo *create_info,
2608 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority);
2609
2610 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
2611 struct radv_descriptor_set *set, unsigned idx);
2612
2613 void radv_cmd_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
2614 VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
2615 const VkWriteDescriptorSet *pDescriptorWrites,
2616 uint32_t descriptorCopyCount,
2617 const VkCopyDescriptorSet *pDescriptorCopies);
2618
2619 void radv_cmd_update_descriptor_set_with_template(struct radv_device *device,
2620 struct radv_cmd_buffer *cmd_buffer,
2621 struct radv_descriptor_set *set,
2622 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
2623 const void *pData);
2624
2625 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
2626 VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
2627 uint32_t set, uint32_t descriptorWriteCount,
2628 const VkWriteDescriptorSet *pDescriptorWrites);
2629
2630 uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2631 const VkImageSubresourceRange *range, uint32_t value);
2632
2633 uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2634 const VkImageSubresourceRange *range);
2635
2636 /* radv_nir_to_llvm.c */
2637 struct radv_shader_args;
2638 struct radv_nir_compiler_options;
2639 struct radv_shader_info;
2640
2641 void llvm_compile_shader(const struct radv_nir_compiler_options *options,
2642 const struct radv_shader_info *info, unsigned shader_count,
2643 struct nir_shader *const *shaders, struct radv_shader_binary **binary,
2644 const struct radv_shader_args *args);
2645
2646 /* radv_shader_info.h */
2647 struct radv_shader_info;
2648
2649 void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
2650 const struct radv_pipeline_layout *layout,
2651 const struct radv_pipeline_key *pipeline_key,
2652 struct radv_shader_info *info);
2653
2654 void radv_nir_shader_info_init(struct radv_shader_info *info);
2655
2656 bool radv_thread_trace_init(struct radv_device *device);
2657 void radv_thread_trace_finish(struct radv_device *device);
2658 bool radv_begin_thread_trace(struct radv_queue *queue);
2659 bool radv_end_thread_trace(struct radv_queue *queue);
2660 bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
2661 void radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
2662 const void *data, uint32_t num_dwords);
2663 bool radv_is_instruction_timing_enabled(void);
2664
2665 bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2666 struct radv_buffer *buffer, const VkBufferImageCopy2KHR *region);
2667
2668 /* radv_sqtt_layer_.c */
2669 struct radv_barrier_data {
2670 union {
2671 struct {
2672 uint16_t depth_stencil_expand : 1;
2673 uint16_t htile_hiz_range_expand : 1;
2674 uint16_t depth_stencil_resummarize : 1;
2675 uint16_t dcc_decompress : 1;
2676 uint16_t fmask_decompress : 1;
2677 uint16_t fast_clear_eliminate : 1;
2678 uint16_t fmask_color_expand : 1;
2679 uint16_t init_mask_ram : 1;
2680 uint16_t reserved : 8;
2681 };
2682 uint16_t all;
2683 } layout_transitions;
2684 };
2685
2686 /**
2687 * Value for the reason field of an RGP barrier start marker originating from
2688 * the Vulkan client (does not include PAL-defined values). (Table 15)
2689 */
2690 enum rgp_barrier_reason {
2691 RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
2692
2693 /* External app-generated barrier reasons, i.e. API synchronization
2694 * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
2695 */
2696 RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
2697 RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
2698 RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
2699
2700 /* Internal barrier reasons, i.e. implicit synchronization inserted by
2701 * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
2702 */
2703 RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
2704 RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
2705 RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
2706 RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
2707 RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
2708 };
2709
2710 void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
2711 void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
2712 void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
2713 void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
2714 void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
2715 VkImageAspectFlagBits aspects);
2716 void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
2717 void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
2718 void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
2719 void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
2720 enum rgp_barrier_reason reason);
2721 void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
2722 void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
2723 void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
2724 const struct radv_barrier_data *barrier);
2725
2726 uint64_t radv_get_current_time(void);
2727
2728 static inline uint32_t
si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)2729 si_conv_gl_prim_to_vertices(enum shader_prim gl_prim)
2730 {
2731 switch (gl_prim) {
2732 case SHADER_PRIM_POINTS:
2733 return 1;
2734 case SHADER_PRIM_LINES:
2735 case SHADER_PRIM_LINE_STRIP:
2736 return 2;
2737 case SHADER_PRIM_TRIANGLES:
2738 case SHADER_PRIM_TRIANGLE_STRIP:
2739 return 3;
2740 case SHADER_PRIM_LINES_ADJACENCY:
2741 return 4;
2742 case SHADER_PRIM_TRIANGLES_ADJACENCY:
2743 return 6;
2744 case SHADER_PRIM_QUADS:
2745 return V_028A6C_TRISTRIP;
2746 default:
2747 assert(0);
2748 return 0;
2749 }
2750 }
2751
2752 static inline uint32_t
si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)2753 si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
2754 {
2755 switch (topology) {
2756 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2757 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
2758 return V_028A6C_POINTLIST;
2759 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2760 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2761 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2762 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2763 return V_028A6C_LINESTRIP;
2764 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2765 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2766 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2767 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2768 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2769 return V_028A6C_TRISTRIP;
2770 default:
2771 assert(0);
2772 return 0;
2773 }
2774 }
2775
2776 struct radv_extra_render_pass_begin_info {
2777 bool disable_dcc;
2778 };
2779
2780 void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
2781 const VkRenderPassBeginInfo *pRenderPassBegin,
2782 const struct radv_extra_render_pass_begin_info *extra_info);
2783 void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer);
2784
2785 static inline uint32_t
si_translate_prim(unsigned topology)2786 si_translate_prim(unsigned topology)
2787 {
2788 switch (topology) {
2789 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
2790 return V_008958_DI_PT_POINTLIST;
2791 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
2792 return V_008958_DI_PT_LINELIST;
2793 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
2794 return V_008958_DI_PT_LINESTRIP;
2795 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
2796 return V_008958_DI_PT_TRILIST;
2797 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
2798 return V_008958_DI_PT_TRISTRIP;
2799 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
2800 return V_008958_DI_PT_TRIFAN;
2801 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
2802 return V_008958_DI_PT_LINELIST_ADJ;
2803 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
2804 return V_008958_DI_PT_LINESTRIP_ADJ;
2805 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
2806 return V_008958_DI_PT_TRILIST_ADJ;
2807 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
2808 return V_008958_DI_PT_TRISTRIP_ADJ;
2809 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
2810 return V_008958_DI_PT_PATCH;
2811 default:
2812 assert(0);
2813 return 0;
2814 }
2815 }
2816
2817 static inline uint32_t
si_translate_stencil_op(enum VkStencilOp op)2818 si_translate_stencil_op(enum VkStencilOp op)
2819 {
2820 switch (op) {
2821 case VK_STENCIL_OP_KEEP:
2822 return V_02842C_STENCIL_KEEP;
2823 case VK_STENCIL_OP_ZERO:
2824 return V_02842C_STENCIL_ZERO;
2825 case VK_STENCIL_OP_REPLACE:
2826 return V_02842C_STENCIL_REPLACE_TEST;
2827 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
2828 return V_02842C_STENCIL_ADD_CLAMP;
2829 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
2830 return V_02842C_STENCIL_SUB_CLAMP;
2831 case VK_STENCIL_OP_INVERT:
2832 return V_02842C_STENCIL_INVERT;
2833 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
2834 return V_02842C_STENCIL_ADD_WRAP;
2835 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
2836 return V_02842C_STENCIL_SUB_WRAP;
2837 default:
2838 return 0;
2839 }
2840 }
2841
2842 static inline uint32_t
si_translate_blend_logic_op(VkLogicOp op)2843 si_translate_blend_logic_op(VkLogicOp op)
2844 {
2845 switch (op) {
2846 case VK_LOGIC_OP_CLEAR:
2847 return V_028808_ROP3_CLEAR;
2848 case VK_LOGIC_OP_AND:
2849 return V_028808_ROP3_AND;
2850 case VK_LOGIC_OP_AND_REVERSE:
2851 return V_028808_ROP3_AND_REVERSE;
2852 case VK_LOGIC_OP_COPY:
2853 return V_028808_ROP3_COPY;
2854 case VK_LOGIC_OP_AND_INVERTED:
2855 return V_028808_ROP3_AND_INVERTED;
2856 case VK_LOGIC_OP_NO_OP:
2857 return V_028808_ROP3_NO_OP;
2858 case VK_LOGIC_OP_XOR:
2859 return V_028808_ROP3_XOR;
2860 case VK_LOGIC_OP_OR:
2861 return V_028808_ROP3_OR;
2862 case VK_LOGIC_OP_NOR:
2863 return V_028808_ROP3_NOR;
2864 case VK_LOGIC_OP_EQUIVALENT:
2865 return V_028808_ROP3_EQUIVALENT;
2866 case VK_LOGIC_OP_INVERT:
2867 return V_028808_ROP3_INVERT;
2868 case VK_LOGIC_OP_OR_REVERSE:
2869 return V_028808_ROP3_OR_REVERSE;
2870 case VK_LOGIC_OP_COPY_INVERTED:
2871 return V_028808_ROP3_COPY_INVERTED;
2872 case VK_LOGIC_OP_OR_INVERTED:
2873 return V_028808_ROP3_OR_INVERTED;
2874 case VK_LOGIC_OP_NAND:
2875 return V_028808_ROP3_NAND;
2876 case VK_LOGIC_OP_SET:
2877 return V_028808_ROP3_SET;
2878 default:
2879 unreachable("Unhandled logic op");
2880 }
2881 }
2882
2883 /*
2884 * Queue helper to get ring.
2885 * placed here as it needs queue + device structs.
2886 */
2887 static inline enum ring_type
radv_queue_ring(struct radv_queue * queue)2888 radv_queue_ring(struct radv_queue *queue)
2889 {
2890 return radv_queue_family_to_ring(queue->device->physical_device, queue->qf);
2891 }
2892
2893 /**
2894 * Helper used for debugging compiler issues by enabling/disabling LLVM for a
2895 * specific shader stage (developers only).
2896 */
2897 static inline bool
radv_use_llvm_for_stage(struct radv_device * device,UNUSED gl_shader_stage stage)2898 radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
2899 {
2900 return device->physical_device->use_llvm;
2901 }
2902
2903 struct radv_acceleration_structure {
2904 struct vk_object_base base;
2905
2906 struct radeon_winsys_bo *bo;
2907 uint64_t mem_offset;
2908 uint64_t size;
2909 };
2910
2911 static inline uint64_t
radv_accel_struct_get_va(const struct radv_acceleration_structure * accel)2912 radv_accel_struct_get_va(const struct radv_acceleration_structure *accel)
2913 {
2914 return radv_buffer_get_va(accel->bo) + accel->mem_offset;
2915 }
2916
2917 /* radv_perfcounter.c */
2918 void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
2919 void radv_perfcounter_emit_reset(struct radeon_cmdbuf *cs);
2920 void radv_perfcounter_emit_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
2921 void radv_perfcounter_emit_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
2922
2923 /* radv_spm.c */
2924 bool radv_spm_init(struct radv_device *device);
2925 void radv_spm_finish(struct radv_device *device);
2926 void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs);
2927
2928 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
2929 VK_FROM_HANDLE(__radv_type, __name, __handle)
2930
2931 VK_DEFINE_HANDLE_CASTS(radv_cmd_buffer, vk.base, VkCommandBuffer,
2932 VK_OBJECT_TYPE_COMMAND_BUFFER)
2933 VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2934 VK_DEFINE_HANDLE_CASTS(radv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)
2935 VK_DEFINE_HANDLE_CASTS(radv_physical_device, vk.base, VkPhysicalDevice,
2936 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2937 VK_DEFINE_HANDLE_CASTS(radv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2938 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_acceleration_structure, base,
2939 VkAccelerationStructureKHR,
2940 VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)
2941 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, vk.base, VkCommandPool,
2942 VK_OBJECT_TYPE_COMMAND_POOL)
2943 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
2944 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, base, VkBufferView,
2945 VK_OBJECT_TYPE_BUFFER_VIEW)
2946 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, base, VkDescriptorPool,
2947 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2948 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, header.base, VkDescriptorSet,
2949 VK_OBJECT_TYPE_DESCRIPTOR_SET)
2950 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, base,
2951 VkDescriptorSetLayout,
2952 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2953 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, base,
2954 VkDescriptorUpdateTemplate,
2955 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2956 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, base, VkDeviceMemory,
2957 VK_OBJECT_TYPE_DEVICE_MEMORY)
2958 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2959 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_framebuffer, base, VkFramebuffer,
2960 VK_OBJECT_TYPE_FRAMEBUFFER)
2961 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)
2962 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, base, VkImageView,
2963 VK_OBJECT_TYPE_IMAGE_VIEW);
2964 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, base, VkPipelineCache,
2965 VK_OBJECT_TYPE_PIPELINE_CACHE)
2966 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline,
2967 VK_OBJECT_TYPE_PIPELINE)
2968 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, base, VkPipelineLayout,
2969 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2970 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, base, VkQueryPool,
2971 VK_OBJECT_TYPE_QUERY_POOL)
2972 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, base, VkRenderPass,
2973 VK_OBJECT_TYPE_RENDER_PASS)
2974 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, base, VkSampler,
2975 VK_OBJECT_TYPE_SAMPLER)
2976 VK_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler_ycbcr_conversion, base,
2977 VkSamplerYcbcrConversion,
2978 VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
2979
2980 #ifdef __cplusplus
2981 }
2982 #endif
2983
2984 #endif /* RADV_PRIVATE_H */
2985