1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include "vk_device.h"
40 #include "vk_instance.h"
41 #include "vk_image.h"
42 #include "vk_log.h"
43 #include "vk_physical_device.h"
44 #include "vk_shader_module.h"
45 #include "vk_util.h"
46 
47 #include "vk_command_buffer.h"
48 #include "vk_queue.h"
49 
50 #include <xf86drm.h>
51 
52 #ifdef HAVE_VALGRIND
53 #include <valgrind.h>
54 #include <memcheck.h>
55 #define VG(x) x
56 #else
57 #define VG(x) ((void)0)
58 #endif
59 
60 #include "v3dv_limits.h"
61 
62 #include "common/v3d_device_info.h"
63 #include "common/v3d_limits.h"
64 #include "common/v3d_tiling.h"
65 #include "common/v3d_util.h"
66 
67 #include "compiler/shader_enums.h"
68 #include "compiler/spirv/nir_spirv.h"
69 
70 #include "compiler/v3d_compiler.h"
71 
72 #include "vk_debug_report.h"
73 #include "util/set.h"
74 #include "util/hash_table.h"
75 #include "util/xmlconfig.h"
76 #include "u_atomic.h"
77 
78 #include "v3dv_entrypoints.h"
79 #include "v3dv_bo.h"
80 
81 #include "drm-uapi/v3d_drm.h"
82 
83 #include "vk_alloc.h"
84 #include "simulator/v3d_simulator.h"
85 
86 #include "v3dv_cl.h"
87 
88 #include "wsi_common.h"
89 
90 /* A non-fatal assert.  Useful for debugging. */
91 #ifdef DEBUG
92 #define v3dv_assert(x) ({ \
93    if (unlikely(!(x))) \
94       fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
95 })
96 #else
97 #define v3dv_assert(x)
98 #endif
99 
100 #define perf_debug(...) do {                       \
101    if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
102       fprintf(stderr, __VA_ARGS__);                \
103 } while (0)
104 
105 struct v3dv_instance;
106 
107 #ifdef USE_V3D_SIMULATOR
108 #define using_v3d_simulator true
109 #else
110 #define using_v3d_simulator false
111 #endif
112 
113 struct v3d_simulator_file;
114 
115 /* Minimum required by the Vulkan 1.1 spec */
116 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
117 
118 struct v3dv_physical_device {
119    struct vk_physical_device vk;
120 
121    char *name;
122    int32_t render_fd;
123    int32_t display_fd;
124    int32_t master_fd;
125 
126    /* We need these because it is not clear how to detect
127     * valid devids in a portable way
128      */
129    bool has_primary;
130    bool has_render;
131 
132    dev_t primary_devid;
133    dev_t render_devid;
134 
135    uint8_t driver_build_sha1[20];
136    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
137    uint8_t device_uuid[VK_UUID_SIZE];
138    uint8_t driver_uuid[VK_UUID_SIZE];
139 
140    struct disk_cache *disk_cache;
141 
142    mtx_t mutex;
143 
144    struct wsi_device wsi_device;
145 
146    VkPhysicalDeviceMemoryProperties memory;
147 
148    struct v3d_device_info devinfo;
149 
150    struct v3d_simulator_file *sim_file;
151 
152    const struct v3d_compiler *compiler;
153    uint32_t next_program_id;
154 
155    struct {
156       bool merge_jobs;
157    } options;
158 };
159 
160 VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
161                                               struct v3dv_physical_device *pdevice,
162                                               VkIcdSurfaceBase *surface);
163 
164 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
165 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
166 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
167                                                      uint32_t index);
168 
169 void v3dv_meta_clear_init(struct v3dv_device *device);
170 void v3dv_meta_clear_finish(struct v3dv_device *device);
171 
172 void v3dv_meta_blit_init(struct v3dv_device *device);
173 void v3dv_meta_blit_finish(struct v3dv_device *device);
174 
175 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
176 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
177 
178 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
179                            const VkOffset3D *offset,
180                            VkFormat *compat_format);
181 
182 struct v3dv_instance {
183    struct vk_instance vk;
184 
185    int physicalDeviceCount;
186    struct v3dv_physical_device physicalDevice;
187 
188    bool pipeline_cache_enabled;
189    bool default_pipeline_cache_enabled;
190 };
191 
192 /* Tracks wait threads spawned from a single vkQueueSubmit call */
193 struct v3dv_queue_submit_wait_info {
194    /*  struct vk_object_base base; ?*/
195    struct list_head list_link;
196 
197    struct v3dv_device *device;
198 
199    /* List of wait threads spawned for any command buffers in a particular
200     * call to vkQueueSubmit.
201     */
202    uint32_t wait_thread_count;
203    struct {
204       pthread_t thread;
205       bool finished;
206    } wait_threads[16];
207 
208    /* The master wait thread for the entire submit. This will wait for all
209     * other threads in this submit to complete  before processing signal
210     * semaphores and fences.
211     */
212    pthread_t master_wait_thread;
213 
214    /* List of semaphores (and fence) to signal after all wait threads completed
215     * and all command buffer jobs in the submission have been sent to the GPU.
216     */
217    uint32_t signal_semaphore_count;
218    VkSemaphore *signal_semaphores;
219    VkFence fence;
220 };
221 
222 struct v3dv_queue {
223    struct vk_queue vk;
224 
225    struct v3dv_device *device;
226 
227    /* A list of active v3dv_queue_submit_wait_info */
228    struct list_head submit_wait_list;
229 
230    /* A mutex to prevent concurrent access to the list of wait threads */
231    mtx_t mutex;
232 
233    struct v3dv_job *noop_job;
234 };
235 
236 #define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
237 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
238                                                     sizeof(VkComponentMapping))
239 
240 struct v3dv_meta_color_clear_pipeline {
241    VkPipeline pipeline;
242    VkRenderPass pass;
243    bool cached;
244    uint64_t key;
245 };
246 
247 struct v3dv_meta_depth_clear_pipeline {
248    VkPipeline pipeline;
249    uint64_t key;
250 };
251 
252 struct v3dv_meta_blit_pipeline {
253    VkPipeline pipeline;
254    VkRenderPass pass;
255    VkRenderPass pass_no_load;
256    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
257 };
258 
259 struct v3dv_meta_texel_buffer_copy_pipeline {
260    VkPipeline pipeline;
261    VkRenderPass pass;
262    VkRenderPass pass_no_load;
263    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
264 };
265 
266 struct v3dv_pipeline_key {
267    bool robust_buffer_access;
268    uint8_t topology;
269    uint8_t logicop_func;
270    bool msaa;
271    bool sample_coverage;
272    bool sample_alpha_to_coverage;
273    bool sample_alpha_to_one;
274    uint8_t cbufs;
275    struct {
276       enum pipe_format format;
277       const uint8_t *swizzle;
278    } color_fmt[V3D_MAX_DRAW_BUFFERS];
279    uint8_t f32_color_rb;
280    uint32_t va_swap_rb_mask;
281    bool has_multiview;
282 };
283 
284 struct v3dv_pipeline_cache_stats {
285    uint32_t miss;
286    uint32_t hit;
287    uint32_t count;
288 };
289 
290 /* Equivalent to gl_shader_stage, but including the coordinate shaders
291  *
292  * FIXME: perhaps move to common
293  */
294 enum broadcom_shader_stage {
295    BROADCOM_SHADER_VERTEX,
296    BROADCOM_SHADER_VERTEX_BIN,
297    BROADCOM_SHADER_GEOMETRY,
298    BROADCOM_SHADER_GEOMETRY_BIN,
299    BROADCOM_SHADER_FRAGMENT,
300    BROADCOM_SHADER_COMPUTE,
301 };
302 
303 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
304 
305 /* Assumes that coordinate shaders will be custom-handled by the caller */
306 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)307 gl_shader_stage_to_broadcom(gl_shader_stage stage)
308 {
309    switch (stage) {
310    case MESA_SHADER_VERTEX:
311       return BROADCOM_SHADER_VERTEX;
312    case MESA_SHADER_GEOMETRY:
313       return BROADCOM_SHADER_GEOMETRY;
314    case MESA_SHADER_FRAGMENT:
315       return BROADCOM_SHADER_FRAGMENT;
316    case MESA_SHADER_COMPUTE:
317       return BROADCOM_SHADER_COMPUTE;
318    default:
319       unreachable("Unknown gl shader stage");
320    }
321 }
322 
323 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)324 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
325 {
326    switch (stage) {
327    case BROADCOM_SHADER_VERTEX:
328    case BROADCOM_SHADER_VERTEX_BIN:
329       return MESA_SHADER_VERTEX;
330    case BROADCOM_SHADER_GEOMETRY:
331    case BROADCOM_SHADER_GEOMETRY_BIN:
332       return MESA_SHADER_GEOMETRY;
333    case BROADCOM_SHADER_FRAGMENT:
334       return MESA_SHADER_FRAGMENT;
335    case BROADCOM_SHADER_COMPUTE:
336       return MESA_SHADER_COMPUTE;
337    default:
338       unreachable("Unknown broadcom shader stage");
339    }
340 }
341 
342 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)343 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
344 {
345    switch (stage) {
346    case BROADCOM_SHADER_VERTEX_BIN:
347    case BROADCOM_SHADER_GEOMETRY_BIN:
348       return true;
349    default:
350       return false;
351    }
352 }
353 
354 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)355 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
356 {
357    switch (stage) {
358    case BROADCOM_SHADER_VERTEX:
359    case BROADCOM_SHADER_GEOMETRY:
360       return true;
361    default:
362       return false;
363    }
364 }
365 
366 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)367 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
368 {
369    switch (stage) {
370    case BROADCOM_SHADER_VERTEX:
371       return BROADCOM_SHADER_VERTEX_BIN;
372    case BROADCOM_SHADER_GEOMETRY:
373       return BROADCOM_SHADER_GEOMETRY_BIN;
374    default:
375       unreachable("Invalid shader stage");
376    }
377 }
378 
379 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)380 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
381 {
382    switch(stage) {
383    case BROADCOM_SHADER_VERTEX_BIN:
384       return "MESA_SHADER_VERTEX_BIN";
385    case BROADCOM_SHADER_GEOMETRY_BIN:
386       return "MESA_SHADER_GEOMETRY_BIN";
387    default:
388       return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
389    }
390 }
391 
392 struct v3dv_pipeline_cache {
393    struct vk_object_base base;
394 
395    struct v3dv_device *device;
396    mtx_t mutex;
397 
398    struct hash_table *nir_cache;
399    struct v3dv_pipeline_cache_stats nir_stats;
400 
401    struct hash_table *cache;
402    struct v3dv_pipeline_cache_stats stats;
403 
404    /* For VK_EXT_pipeline_creation_cache_control. */
405    bool externally_synchronized;
406 };
407 
408 struct v3dv_device {
409    struct vk_device vk;
410 
411    struct v3dv_instance *instance;
412    struct v3dv_physical_device *pdevice;
413 
414    struct v3d_device_info devinfo;
415    struct v3dv_queue queue;
416 
417    /* A sync object to track the last job submitted to the GPU. */
418    uint32_t last_job_sync;
419 
420    /* A mutex to prevent concurrent access to last_job_sync from the queue */
421    mtx_t mutex;
422 
423    /* Resources used for meta operations */
424    struct {
425       mtx_t mtx;
426       struct {
427          VkPipelineLayout p_layout;
428          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
429       } color_clear;
430       struct {
431          VkPipelineLayout p_layout;
432          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
433       } depth_clear;
434       struct {
435          VkDescriptorSetLayout ds_layout;
436          VkPipelineLayout p_layout;
437          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
438       } blit;
439       struct {
440          VkDescriptorSetLayout ds_layout;
441          VkPipelineLayout p_layout;
442          struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
443       } texel_buffer_copy;
444    } meta;
445 
446    struct v3dv_bo_cache {
447       /** List of struct v3d_bo freed, by age. */
448       struct list_head time_list;
449       /** List of struct v3d_bo freed, per size, by age. */
450       struct list_head *size_list;
451       uint32_t size_list_size;
452 
453       mtx_t lock;
454 
455       uint32_t cache_size;
456       uint32_t cache_count;
457       uint32_t max_cache_size;
458    } bo_cache;
459 
460    uint32_t bo_size;
461    uint32_t bo_count;
462 
463    struct v3dv_pipeline_cache default_pipeline_cache;
464 
465    /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
466     * following covers the most common case, that is all attributes format
467     * being float being float, allowing us to reuse the same BO for all
468     * pipelines matching this requirement. Pipelines that need integer
469     * attributes will create their own BO.
470     */
471    struct v3dv_bo *default_attribute_float;
472    VkPhysicalDeviceFeatures features;
473 };
474 
475 struct v3dv_device_memory {
476    struct vk_object_base base;
477 
478    struct v3dv_bo *bo;
479    const VkMemoryType *type;
480    bool has_bo_ownership;
481    bool is_for_wsi;
482 };
483 
484 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
485 #define TEXTURE_DATA_FORMAT_NO     255
486 
487 struct v3dv_format {
488    bool supported;
489 
490    /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
491    uint8_t rt_type;
492 
493    /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
494    uint8_t tex_type;
495 
496    /* Swizzle to apply to the RGBA shader output for storing to the tile
497     * buffer, to the RGBA tile buffer to produce shader input (for
498     * blending), and for turning the rgba8888 texture sampler return
499     * value into shader rgba values.
500     */
501    uint8_t swizzle[4];
502 
503    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
504    uint8_t return_size;
505 
506    /* If the format supports (linear) filtering when texturing. */
507    bool supports_filtering;
508 };
509 
510 struct v3d_resource_slice {
511    uint32_t offset;
512    uint32_t stride;
513    uint32_t padded_height;
514    /* Size of a single pane of the slice.  For 3D textures, there will be
515     * a number of panes equal to the minified, power-of-two-aligned
516     * depth.
517     */
518    uint32_t size;
519    uint8_t ub_pad;
520    enum v3d_tiling_mode tiling;
521    uint32_t padded_height_of_output_image_in_uif_blocks;
522 };
523 
524 struct v3dv_image {
525    struct vk_image vk;
526 
527    const struct v3dv_format *format;
528    uint32_t cpp;
529    bool tiled;
530 
531    struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
532    uint64_t size; /* Total size in bytes */
533    uint32_t cube_map_stride;
534 
535    struct v3dv_device_memory *mem;
536    VkDeviceSize mem_offset;
537    uint32_t alignment;
538 };
539 
540 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
541 
542 /* Pre-generating packets needs to consider changes in packet sizes across hw
543  * versions. Keep things simple and allocate enough space for any supported
544  * version. We ensure the size is large enough through static asserts.
545  */
546 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
547 #define V3DV_SAMPLER_STATE_LENGTH 24
548 #define V3DV_BLEND_CFG_LENGTH 5
549 #define V3DV_CFG_BITS_LENGTH 4
550 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
551 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
552 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
553 #define V3DV_STENCIL_CFG_LENGTH 6
554 
555 struct v3dv_image_view {
556    struct vk_image_view vk;
557 
558    const struct v3dv_format *format;
559    bool swap_rb;
560    uint32_t internal_bpp;
561    uint32_t internal_type;
562    uint32_t offset;
563 
564    /* Precomputed (composed from createinfo->components and formar swizzle)
565     * swizzles to pass in to the shader key.
566     *
567     * This could be also included on the descriptor bo, but the shader state
568     * packet doesn't need it on a bo, so we can just avoid a memory copy
569     */
570    uint8_t swizzle[4];
571 
572    /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
573     * during UpdateDescriptorSets.
574     *
575     * Empirical tests show that cube arrays need a different shader state
576     * depending on whether they are used with a sampler or not, so for these
577     * we generate two states and select the one to use based on the descriptor
578     * type.
579     */
580    uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
581 };
582 
583 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
584 
585 struct v3dv_buffer {
586    struct vk_object_base base;
587 
588    VkDeviceSize size;
589    VkBufferUsageFlags usage;
590    uint32_t alignment;
591 
592    struct v3dv_device_memory *mem;
593    VkDeviceSize mem_offset;
594 };
595 
596 struct v3dv_buffer_view {
597    struct vk_object_base base;
598 
599    struct v3dv_buffer *buffer;
600 
601    VkFormat vk_format;
602    const struct v3dv_format *format;
603    uint32_t internal_bpp;
604    uint32_t internal_type;
605 
606    uint32_t offset;
607    uint32_t size;
608    uint32_t num_elements;
609 
610    /* Prepacked TEXTURE_SHADER_STATE. */
611    uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
612 };
613 
614 struct v3dv_subpass_attachment {
615    uint32_t attachment;
616    VkImageLayout layout;
617 };
618 
619 struct v3dv_subpass {
620    uint32_t input_count;
621    struct v3dv_subpass_attachment *input_attachments;
622 
623    uint32_t color_count;
624    struct v3dv_subpass_attachment *color_attachments;
625    struct v3dv_subpass_attachment *resolve_attachments;
626 
627    struct v3dv_subpass_attachment ds_attachment;
628 
629    /* If we need to emit the clear of the depth/stencil attachment using a
630     * a draw call instead of using the TLB (GFXH-1461).
631     */
632    bool do_depth_clear_with_draw;
633    bool do_stencil_clear_with_draw;
634 
635    /* Multiview */
636    uint32_t view_mask;
637 };
638 
639 struct v3dv_render_pass_attachment {
640    VkAttachmentDescription desc;
641 
642    uint32_t first_subpass;
643    uint32_t last_subpass;
644 
645    /* When multiview is enabled, we no longer care about when a particular
646     * attachment is first or last used in a render pass, since not all views
647     * in the attachment will meet that criteria. Instead, we need to track
648     * each individual view (layer) in each attachment and emit our stores,
649     * loads and clears accordingly.
650     */
651    struct {
652       uint32_t first_subpass;
653       uint32_t last_subpass;
654    } views[MAX_MULTIVIEW_VIEW_COUNT];
655 
656    /* If this is a multismapled attachment that is going to be resolved,
657     * whether we can use the TLB resolve on store.
658     */
659    bool use_tlb_resolve;
660 };
661 
662 struct v3dv_render_pass {
663    struct vk_object_base base;
664 
665    bool multiview_enabled;
666 
667    uint32_t attachment_count;
668    struct v3dv_render_pass_attachment *attachments;
669 
670    uint32_t subpass_count;
671    struct v3dv_subpass *subpasses;
672 
673    struct v3dv_subpass_attachment *subpass_attachments;
674 };
675 
676 struct v3dv_framebuffer {
677    struct vk_object_base base;
678 
679    uint32_t width;
680    uint32_t height;
681    uint32_t layers;
682 
683    /* Typically, edge tiles in the framebuffer have padding depending on the
684     * underlying tiling layout. One consequnce of this is that when the
685     * framebuffer dimensions are not aligned to tile boundaries, tile stores
686     * would still write full tiles on the edges and write to the padded area.
687     * If the framebuffer is aliasing a smaller region of a larger image, then
688     * we need to be careful with this though, as we won't have padding on the
689     * edge tiles (which typically means that we need to load the tile buffer
690     * before we store).
691     */
692    bool has_edge_padding;
693 
694    uint32_t attachment_count;
695    uint32_t color_attachment_count;
696    struct v3dv_image_view *attachments[0];
697 };
698 
699 struct v3dv_frame_tiling {
700    uint32_t width;
701    uint32_t height;
702    uint32_t layers;
703    uint32_t render_target_count;
704    uint32_t internal_bpp;
705    bool     msaa;
706    uint32_t tile_width;
707    uint32_t tile_height;
708    uint32_t draw_tiles_x;
709    uint32_t draw_tiles_y;
710    uint32_t supertile_width;
711    uint32_t supertile_height;
712    uint32_t frame_width_in_supertiles;
713    uint32_t frame_height_in_supertiles;
714 };
715 
716 void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *framebuffer,
717                                                 const struct v3dv_subpass *subpass,
718                                                 uint8_t *max_bpp, bool *msaa);
719 
720 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
721                                        const VkRect2D *area,
722                                        struct v3dv_framebuffer *fb,
723                                        struct v3dv_render_pass *pass,
724                                        uint32_t subpass_idx);
725 
726 struct v3dv_cmd_pool {
727    struct vk_object_base base;
728 
729    VkAllocationCallbacks alloc;
730    struct list_head cmd_buffers;
731 };
732 
733 enum v3dv_cmd_buffer_status {
734    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
735    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
736    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
737    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
738 };
739 
740 union v3dv_clear_value {
741    uint32_t color[4];
742    struct {
743       float z;
744       uint8_t s;
745    };
746 };
747 
748 struct v3dv_cmd_buffer_attachment_state {
749    /* The original clear value as provided by the Vulkan API */
750    VkClearValue vk_clear_value;
751 
752    /* The hardware clear value */
753    union v3dv_clear_value clear_value;
754 };
755 
756 struct v3dv_viewport_state {
757    uint32_t count;
758    VkViewport viewports[MAX_VIEWPORTS];
759    float translate[MAX_VIEWPORTS][3];
760    float scale[MAX_VIEWPORTS][3];
761 };
762 
763 struct v3dv_scissor_state {
764    uint32_t count;
765    VkRect2D scissors[MAX_SCISSORS];
766 };
767 
768 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
769  * defined as dynamic
770  */
771 enum v3dv_dynamic_state_bits {
772    V3DV_DYNAMIC_VIEWPORT                  = 1 << 0,
773    V3DV_DYNAMIC_SCISSOR                   = 1 << 1,
774    V3DV_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
775    V3DV_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
776    V3DV_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
777    V3DV_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
778    V3DV_DYNAMIC_DEPTH_BIAS                = 1 << 6,
779    V3DV_DYNAMIC_LINE_WIDTH                = 1 << 7,
780    V3DV_DYNAMIC_COLOR_WRITE_ENABLE        = 1 << 8,
781    V3DV_DYNAMIC_ALL                       = (1 << 9) - 1,
782 };
783 
784 /* Flags for dirty pipeline state.
785  */
786 enum v3dv_cmd_dirty_bits {
787    V3DV_CMD_DIRTY_VIEWPORT                  = 1 << 0,
788    V3DV_CMD_DIRTY_SCISSOR                   = 1 << 1,
789    V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
790    V3DV_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
791    V3DV_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
792    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
793    V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 6,
794    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 7,
795    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 8,
796    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 9,
797    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 10,
798    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 11,
799    V3DV_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 12,
800    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 13,
801    V3DV_CMD_DIRTY_DEPTH_BIAS                = 1 << 14,
802    V3DV_CMD_DIRTY_LINE_WIDTH                = 1 << 15,
803    V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 16,
804    V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE        = 1 << 17,
805 };
806 
807 struct v3dv_dynamic_state {
808    /**
809     * Bitmask of (1 << VK_DYNAMIC_STATE_*).
810     * Defines the set of saved dynamic state.
811     */
812    uint32_t mask;
813 
814    struct v3dv_viewport_state viewport;
815 
816    struct v3dv_scissor_state scissor;
817 
818    struct {
819       uint32_t front;
820       uint32_t back;
821    } stencil_compare_mask;
822 
823    struct {
824       uint32_t front;
825       uint32_t back;
826    } stencil_write_mask;
827 
828    struct {
829       uint32_t front;
830       uint32_t back;
831    } stencil_reference;
832 
833    float blend_constants[4];
834 
835    struct {
836       float constant_factor;
837       float depth_bias_clamp;
838       float slope_factor;
839    } depth_bias;
840 
841    float line_width;
842 
843    uint32_t color_write_enable;
844 };
845 
846 extern const struct v3dv_dynamic_state default_dynamic_state;
847 
848 void v3dv_viewport_compute_xform(const VkViewport *viewport,
849                                  float scale[3],
850                                  float translate[3]);
851 
852 enum v3dv_ez_state {
853    V3D_EZ_UNDECIDED = 0,
854    V3D_EZ_GT_GE,
855    V3D_EZ_LT_LE,
856    V3D_EZ_DISABLED,
857 };
858 
859 enum v3dv_job_type {
860    V3DV_JOB_TYPE_GPU_CL = 0,
861    V3DV_JOB_TYPE_GPU_CL_SECONDARY,
862    V3DV_JOB_TYPE_GPU_TFU,
863    V3DV_JOB_TYPE_GPU_CSD,
864    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
865    V3DV_JOB_TYPE_CPU_END_QUERY,
866    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
867    V3DV_JOB_TYPE_CPU_SET_EVENT,
868    V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
869    V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
870    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
871    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
872 };
873 
874 struct v3dv_reset_query_cpu_job_info {
875    struct v3dv_query_pool *pool;
876    uint32_t first;
877    uint32_t count;
878 };
879 
880 struct v3dv_end_query_cpu_job_info {
881    struct v3dv_query_pool *pool;
882    uint32_t query;
883 
884    /* This is one unless multiview is used */
885    uint32_t count;
886 };
887 
888 struct v3dv_copy_query_results_cpu_job_info {
889    struct v3dv_query_pool *pool;
890    uint32_t first;
891    uint32_t count;
892    struct v3dv_buffer *dst;
893    uint32_t offset;
894    uint32_t stride;
895    VkQueryResultFlags flags;
896 };
897 
898 struct v3dv_event_set_cpu_job_info {
899    struct v3dv_event *event;
900    int state;
901 };
902 
903 struct v3dv_event_wait_cpu_job_info {
904    /* List of events to wait on */
905    uint32_t event_count;
906    struct v3dv_event **events;
907 
908    /* Whether any postponed jobs after the wait should wait on semaphores */
909    bool sem_wait;
910 };
911 
912 struct v3dv_copy_buffer_to_image_cpu_job_info {
913    struct v3dv_image *image;
914    struct v3dv_buffer *buffer;
915    uint32_t buffer_offset;
916    uint32_t buffer_stride;
917    uint32_t buffer_layer_stride;
918    VkOffset3D image_offset;
919    VkExtent3D image_extent;
920    uint32_t mip_level;
921    uint32_t base_layer;
922    uint32_t layer_count;
923 };
924 
925 struct v3dv_csd_indirect_cpu_job_info {
926    struct v3dv_buffer *buffer;
927    uint32_t offset;
928    struct v3dv_job *csd_job;
929    uint32_t wg_size;
930    uint32_t *wg_uniform_offsets[3];
931    bool needs_wg_uniform_rewrite;
932 };
933 
934 struct v3dv_timestamp_query_cpu_job_info {
935    struct v3dv_query_pool *pool;
936    uint32_t query;
937 
938    /* This is one unless multiview is used */
939    uint32_t count;
940 };
941 
942 struct v3dv_job {
943    struct list_head list_link;
944 
945    /* We only create job clones when executing secondary command buffers into
946     * primaries. These clones don't make deep copies of the original object
947     * so we want to flag them to avoid freeing resources they don't own.
948     */
949    bool is_clone;
950 
951    enum v3dv_job_type type;
952 
953    struct v3dv_device *device;
954 
955    struct v3dv_cmd_buffer *cmd_buffer;
956 
957    struct v3dv_cl bcl;
958    struct v3dv_cl rcl;
959    struct v3dv_cl indirect;
960 
961    /* Set of all BOs referenced by the job. This will be used for making
962     * the list of BOs that the kernel will need to have paged in to
963     * execute our job.
964     */
965    struct set *bos;
966    uint32_t bo_count;
967    uint64_t bo_handle_mask;
968 
969    struct v3dv_bo *tile_alloc;
970    struct v3dv_bo *tile_state;
971 
972    bool tmu_dirty_rcl;
973 
974    uint32_t first_subpass;
975 
976    /* When the current subpass is split into multiple jobs, this flag is set
977     * to true for any jobs after the first in the same subpass.
978     */
979    bool is_subpass_continue;
980 
981    /* If this job is the last job emitted for a subpass. */
982    bool is_subpass_finish;
983 
984    struct v3dv_frame_tiling frame_tiling;
985 
986    enum v3dv_ez_state ez_state;
987    enum v3dv_ez_state first_ez_state;
988 
989    /* If we have already decided if we need to disable Early Z/S completely
990     * for this job.
991     */
992    bool decided_global_ez_enable;
993 
994    /* If this job has been configured to use early Z/S clear */
995    bool early_zs_clear;
996 
997    /* Number of draw calls recorded into the job */
998    uint32_t draw_count;
999 
1000    /* A flag indicating whether we want to flush every draw separately. This
1001     * can be used for debugging, or for cases where special circumstances
1002     * require this behavior.
1003     */
1004    bool always_flush;
1005 
1006    /* Whether we need to serialize this job in our command stream */
1007    bool serialize;
1008 
1009    /* If this is a CL job, whether we should sync before binning */
1010    bool needs_bcl_sync;
1011 
1012    /* Job specs for CPU jobs */
1013    union {
1014       struct v3dv_reset_query_cpu_job_info          query_reset;
1015       struct v3dv_end_query_cpu_job_info            query_end;
1016       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1017       struct v3dv_event_set_cpu_job_info            event_set;
1018       struct v3dv_event_wait_cpu_job_info           event_wait;
1019       struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1020       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1021       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1022    } cpu;
1023 
1024    /* Job specs for TFU jobs */
1025    struct drm_v3d_submit_tfu tfu;
1026 
1027    /* Job specs for CSD jobs */
1028    struct {
1029       struct v3dv_bo *shared_memory;
1030       uint32_t wg_count[3];
1031       uint32_t wg_base[3];
1032       struct drm_v3d_submit_csd submit;
1033    } csd;
1034 };
1035 
1036 void v3dv_job_init(struct v3dv_job *job,
1037                    enum v3dv_job_type type,
1038                    struct v3dv_device *device,
1039                    struct v3dv_cmd_buffer *cmd_buffer,
1040                    int32_t subpass_idx);
1041 void v3dv_job_destroy(struct v3dv_job *job);
1042 
1043 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1044 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1045 
1046 void v3dv_job_start_frame(struct v3dv_job *job,
1047                           uint32_t width,
1048                           uint32_t height,
1049                           uint32_t layers,
1050                           bool allocate_tile_state_for_all_layers,
1051                           uint32_t render_target_count,
1052                           uint8_t max_internal_bpp,
1053                           bool msaa);
1054 
1055 struct v3dv_job *
1056 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1057                              struct v3dv_cmd_buffer *cmd_buffer);
1058 
1059 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1060                                                 enum v3dv_job_type type,
1061                                                 struct v3dv_cmd_buffer *cmd_buffer,
1062                                                 uint32_t subpass_idx);
1063 
1064 void
1065 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1066                                    uint32_t slot_size,
1067                                    uint32_t used_count,
1068                                    uint32_t *alloc_count,
1069                                    void **ptr);
1070 
1071 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
1072 
1073 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1074  * cmd_buffer specific header?
1075  */
1076 struct v3dv_draw_info {
1077    uint32_t vertex_count;
1078    uint32_t instance_count;
1079    uint32_t first_vertex;
1080    uint32_t first_instance;
1081 };
1082 
1083 struct v3dv_vertex_binding {
1084    struct v3dv_buffer *buffer;
1085    VkDeviceSize offset;
1086 };
1087 
1088 struct v3dv_descriptor_state {
1089    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1090    uint32_t valid;
1091    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1092 };
1093 
1094 struct v3dv_cmd_pipeline_state {
1095    struct v3dv_pipeline *pipeline;
1096 
1097    struct v3dv_descriptor_state descriptor_state;
1098 };
1099 
1100 struct v3dv_cmd_buffer_state {
1101    struct v3dv_render_pass *pass;
1102    struct v3dv_framebuffer *framebuffer;
1103    VkRect2D render_area;
1104 
1105    /* Current job being recorded */
1106    struct v3dv_job *job;
1107 
1108    uint32_t subpass_idx;
1109 
1110    struct v3dv_cmd_pipeline_state gfx;
1111    struct v3dv_cmd_pipeline_state compute;
1112 
1113    struct v3dv_dynamic_state dynamic;
1114 
1115    uint32_t dirty;
1116    VkShaderStageFlagBits dirty_descriptor_stages;
1117    VkShaderStageFlagBits dirty_push_constants_stages;
1118 
1119    /* Current clip window. We use this to check whether we have an active
1120     * scissor, since in that case we can't use TLB clears and need to fallback
1121     * to drawing rects.
1122     */
1123    VkRect2D clip_window;
1124 
1125    /* Whether our render area is aligned to tile boundaries. If this is false
1126     * then we have tiles that are only partially covered by the render area,
1127     * and therefore, we need to be careful with our loads and stores so we don't
1128     * modify pixels for the tile area that is not covered by the render area.
1129     * This means, for example, that we can't use the TLB to clear, since that
1130     * always clears full tiles.
1131     */
1132    bool tile_aligned_render_area;
1133 
1134    uint32_t attachment_alloc_count;
1135    struct v3dv_cmd_buffer_attachment_state *attachments;
1136 
1137    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1138 
1139    struct {
1140       VkBuffer buffer;
1141       VkDeviceSize offset;
1142       uint8_t index_size;
1143    } index_buffer;
1144 
1145    /* Current uniforms */
1146    struct {
1147       struct v3dv_cl_reloc vs_bin;
1148       struct v3dv_cl_reloc vs;
1149       struct v3dv_cl_reloc gs_bin;
1150       struct v3dv_cl_reloc gs;
1151       struct v3dv_cl_reloc fs;
1152    } uniforms;
1153 
1154    /* Current view index for multiview rendering */
1155    uint32_t view_index;
1156 
1157    /* Used to flag OOM conditions during command buffer recording */
1158    bool oom;
1159 
1160    /* Whether we have recorded a pipeline barrier that we still need to
1161     * process.
1162     */
1163    bool has_barrier;
1164    bool has_bcl_barrier;
1165 
1166    /* Secondary command buffer state */
1167    struct {
1168       bool occlusion_query_enable;
1169    } inheritance;
1170 
1171    /* Command buffer state saved during a meta operation */
1172    struct {
1173       uint32_t subpass_idx;
1174       VkRenderPass pass;
1175       VkFramebuffer framebuffer;
1176 
1177       uint32_t attachment_alloc_count;
1178       uint32_t attachment_count;
1179       struct v3dv_cmd_buffer_attachment_state *attachments;
1180 
1181       bool tile_aligned_render_area;
1182       VkRect2D render_area;
1183 
1184       struct v3dv_dynamic_state dynamic;
1185 
1186       struct v3dv_cmd_pipeline_state gfx;
1187       bool has_descriptor_state;
1188 
1189       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1190    } meta;
1191 
1192    /* Command buffer state for queries */
1193    struct {
1194       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1195        * a render pass. We queue these here and then schedule the corresponding
1196        * CPU jobs for them at the time we finish the GPU job in which they have
1197        * been recorded.
1198        */
1199       struct {
1200          uint32_t used_count;
1201          uint32_t alloc_count;
1202          struct v3dv_end_query_cpu_job_info *states;
1203       } end;
1204 
1205       /* This BO is not NULL if we have an active query, that is, we have
1206        * called vkCmdBeginQuery but not vkCmdEndQuery.
1207        */
1208       struct {
1209          struct v3dv_bo *bo;
1210          uint32_t offset;
1211       } active_query;
1212    } query;
1213 };
1214 
1215 /* The following struct represents the info from a descriptor that we store on
1216  * the host memory. They are mostly links to other existing vulkan objects,
1217  * like the image_view in order to access to swizzle info, or the buffer used
1218  * for a UBO/SSBO, for example.
1219  *
1220  * FIXME: revisit if makes sense to just move everything that would be needed
1221  * from a descriptor to the bo.
1222  */
1223 struct v3dv_descriptor {
1224    VkDescriptorType type;
1225 
1226    union {
1227       struct {
1228          struct v3dv_image_view *image_view;
1229          struct v3dv_sampler *sampler;
1230       };
1231 
1232       struct {
1233          struct v3dv_buffer *buffer;
1234          uint32_t offset;
1235          uint32_t range;
1236       };
1237 
1238       struct v3dv_buffer_view *buffer_view;
1239    };
1240 };
1241 
1242 struct v3dv_query {
1243    bool maybe_available;
1244    union {
1245       /* Used by GPU queries (occlusion) */
1246       struct {
1247          struct v3dv_bo *bo;
1248          uint32_t offset;
1249       };
1250       /* Used by CPU queries (timestamp) */
1251       uint64_t value;
1252    };
1253 };
1254 
1255 struct v3dv_query_pool {
1256    struct vk_object_base base;
1257 
1258    struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1259 
1260    VkQueryType query_type;
1261    uint32_t query_count;
1262    struct v3dv_query *queries;
1263 };
1264 
1265 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1266                                          struct v3dv_query_pool *pool,
1267                                          uint32_t first,
1268                                          uint32_t count,
1269                                          void *data,
1270                                          VkDeviceSize stride,
1271                                          VkQueryResultFlags flags);
1272 
1273 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1274                                                        uint64_t pobj,
1275                                                        VkAllocationCallbacks *alloc);
1276 struct v3dv_cmd_buffer_private_obj {
1277    struct list_head list_link;
1278    uint64_t obj;
1279    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1280 };
1281 
1282 struct v3dv_cmd_buffer {
1283    struct vk_command_buffer vk;
1284 
1285    struct v3dv_device *device;
1286 
1287    struct v3dv_cmd_pool *pool;
1288    struct list_head pool_link;
1289 
1290    /* Used at submit time to link command buffers in the submission that have
1291     * spawned wait threads, so we can then wait on all of them to complete
1292     * before we process any signal sempahores or fences.
1293     */
1294    struct list_head list_link;
1295 
1296    VkCommandBufferUsageFlags usage_flags;
1297    VkCommandBufferLevel level;
1298 
1299    enum v3dv_cmd_buffer_status status;
1300 
1301    struct v3dv_cmd_buffer_state state;
1302 
1303    /* FIXME: we have just one client-side and bo for the push constants,
1304     * independently of the stageFlags in vkCmdPushConstants, and the
1305     * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1306     * tunning in the future if it makes sense.
1307     */
1308    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1309    struct v3dv_cl_reloc push_constants_resource;
1310 
1311    /* Collection of Vulkan objects created internally by the driver (typically
1312     * during recording of meta operations) that are part of the command buffer
1313     * and should be destroyed with it.
1314     */
1315    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1316 
1317    /* Per-command buffer resources for meta operations. */
1318    struct {
1319       struct {
1320          /* The current descriptor pool for blit sources */
1321          VkDescriptorPool dspool;
1322       } blit;
1323       struct {
1324          /* The current descriptor pool for texel buffer copy sources */
1325          VkDescriptorPool dspool;
1326       } texel_buffer_copy;
1327    } meta;
1328 
1329    /* List of jobs in the command buffer. For primary command buffers it
1330     * represents the jobs we want to submit to the GPU. For secondary command
1331     * buffers it represents jobs that will be merged into a primary command
1332     * buffer via vkCmdExecuteCommands.
1333     */
1334    struct list_head jobs;
1335 };
1336 
1337 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1338                                            int32_t subpass_idx,
1339                                            enum v3dv_job_type type);
1340 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1341 
1342 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1343                                                uint32_t subpass_idx);
1344 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1345                                                 uint32_t subpass_idx);
1346 
1347 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1348 
1349 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1350                                      bool push_descriptor_state);
1351 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1352                                     uint32_t dirty_dynamic_state,
1353                                     bool needs_subpass_resume);
1354 
1355 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1356                                    struct v3dv_query_pool *pool,
1357                                    uint32_t first,
1358                                    uint32_t count);
1359 
1360 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1361                                  struct v3dv_query_pool *pool,
1362                                  uint32_t query,
1363                                  VkQueryControlFlags flags);
1364 
1365 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1366                                struct v3dv_query_pool *pool,
1367                                uint32_t query);
1368 
1369 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1370                                         struct v3dv_query_pool *pool,
1371                                         uint32_t first,
1372                                         uint32_t count,
1373                                         struct v3dv_buffer *dst,
1374                                         uint32_t offset,
1375                                         uint32_t stride,
1376                                         VkQueryResultFlags flags);
1377 
1378 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1379                                  struct drm_v3d_submit_tfu *tfu);
1380 
1381 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1382                                               const uint32_t *wg_counts);
1383 
1384 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1385                                      uint64_t obj,
1386                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1387 
1388 struct v3dv_semaphore {
1389    struct vk_object_base base;
1390 
1391    /* A syncobject handle associated with this semaphore */
1392    uint32_t sync;
1393 
1394    /* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
1395    uint32_t temp_sync;
1396 };
1397 
1398 struct v3dv_fence {
1399    struct vk_object_base base;
1400 
1401    /* A syncobject handle associated with this fence */
1402    uint32_t sync;
1403 
1404    /* A temporary syncobject handle produced from a vkImportFenceFd. */
1405    uint32_t temp_sync;
1406 };
1407 
1408 struct v3dv_event {
1409    struct vk_object_base base;
1410    int state;
1411 };
1412 
1413 struct v3dv_shader_variant {
1414    enum broadcom_shader_stage stage;
1415 
1416    union {
1417       struct v3d_prog_data *base;
1418       struct v3d_vs_prog_data *vs;
1419       struct v3d_gs_prog_data *gs;
1420       struct v3d_fs_prog_data *fs;
1421       struct v3d_compute_prog_data *cs;
1422    } prog_data;
1423 
1424    /* We explicitly save the prog_data_size as it would make easier to
1425     * serialize
1426     */
1427    uint32_t prog_data_size;
1428 
1429    /* The assembly for this variant will be uploaded to a BO shared with all
1430     * other shader stages in that pipeline. This is the offset in that BO.
1431     */
1432    uint32_t assembly_offset;
1433 
1434    /* Note: it is really likely that qpu_insts would be NULL, as it will be
1435     * used only temporarily, to upload it to the shared bo, as we compile the
1436     * different stages individually.
1437     */
1438    uint64_t *qpu_insts;
1439    uint32_t qpu_insts_size;
1440 };
1441 
1442 /*
1443  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1444  * other methods doesn't have so many parameters.
1445  *
1446  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1447  * entrypoint, spec_info and nir are the same. There are also info only
1448  * relevant to some stages. But seemed too much a hassle to create a new
1449  * struct only to handle that. Revisit if such kind of info starts to grow.
1450  */
1451 struct v3dv_pipeline_stage {
1452    struct v3dv_pipeline *pipeline;
1453 
1454    enum broadcom_shader_stage stage;
1455 
1456    const struct vk_shader_module *module;
1457    const char *entrypoint;
1458    const VkSpecializationInfo *spec_info;
1459 
1460    nir_shader *nir;
1461 
1462    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1463    unsigned char shader_sha1[20];
1464 
1465    /** A name for this program, so you can track it in shader-db output. */
1466    uint32_t program_id;
1467 
1468    VkPipelineCreationFeedbackEXT feedback;
1469 };
1470 
1471 /* We are using the descriptor pool entry for two things:
1472  * * Track the allocated sets, so we can properly free it if needed
1473  * * Track the suballocated pool bo regions, so if some descriptor set is
1474  *   freed, the gap could be reallocated later.
1475  *
1476  * Those only make sense if the pool was not created with the flag
1477  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1478  */
1479 struct v3dv_descriptor_pool_entry
1480 {
1481    struct v3dv_descriptor_set *set;
1482    /* Offset and size of the subregion allocated for this entry from the
1483     * pool->bo
1484     */
1485    uint32_t offset;
1486    uint32_t size;
1487 };
1488 
1489 struct v3dv_descriptor_pool {
1490    struct vk_object_base base;
1491 
1492    /* If this descriptor pool has been allocated for the driver for internal
1493     * use, typically to implement meta operations.
1494     */
1495    bool is_driver_internal;
1496 
1497    struct v3dv_bo *bo;
1498    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1499     * any descriptor. If the descriptor bo is NULL, current offset is
1500     * meaningless
1501     */
1502    uint32_t current_offset;
1503 
1504    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1505     * descriptor sets are handled as a whole as pool memory and handled by the
1506     * following pointers. If set, they are not used, and individually
1507     * descriptor sets are allocated/freed.
1508     */
1509    uint8_t *host_memory_base;
1510    uint8_t *host_memory_ptr;
1511    uint8_t *host_memory_end;
1512 
1513    uint32_t entry_count;
1514    uint32_t max_entry_count;
1515    struct v3dv_descriptor_pool_entry entries[0];
1516 };
1517 
1518 struct v3dv_descriptor_set {
1519    struct vk_object_base base;
1520 
1521    struct v3dv_descriptor_pool *pool;
1522 
1523    const struct v3dv_descriptor_set_layout *layout;
1524 
1525    /* Offset relative to the descriptor pool bo for this set */
1526    uint32_t base_offset;
1527 
1528    /* The descriptors below can be indexed (set/binding) using the set_layout
1529     */
1530    struct v3dv_descriptor descriptors[0];
1531 };
1532 
1533 struct v3dv_descriptor_set_binding_layout {
1534    VkDescriptorType type;
1535 
1536    /* Number of array elements in this binding */
1537    uint32_t array_size;
1538 
1539    /* Index into the flattend descriptor set */
1540    uint32_t descriptor_index;
1541 
1542    uint32_t dynamic_offset_count;
1543    uint32_t dynamic_offset_index;
1544 
1545    /* Offset into the descriptor set where this descriptor lives (final offset
1546     * on the descriptor bo need to take into account set->base_offset)
1547     */
1548    uint32_t descriptor_offset;
1549 
1550    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1551     * if there are no immutable samplers.
1552     */
1553    uint32_t immutable_samplers_offset;
1554 };
1555 
1556 struct v3dv_descriptor_set_layout {
1557    struct vk_object_base base;
1558 
1559    VkDescriptorSetLayoutCreateFlags flags;
1560 
1561    /* Number of bindings in this descriptor set */
1562    uint32_t binding_count;
1563 
1564    /* Total bo size needed for this descriptor set
1565     */
1566    uint32_t bo_size;
1567 
1568    /* Shader stages affected by this descriptor set */
1569    uint16_t shader_stages;
1570 
1571    /* Number of descriptors in this descriptor set */
1572    uint32_t descriptor_count;
1573 
1574    /* Number of dynamic offsets used by this descriptor set */
1575    uint16_t dynamic_offset_count;
1576 
1577    /* Bindings in this descriptor set */
1578    struct v3dv_descriptor_set_binding_layout binding[0];
1579 };
1580 
1581 struct v3dv_pipeline_layout {
1582    struct vk_object_base base;
1583 
1584    struct {
1585       struct v3dv_descriptor_set_layout *layout;
1586       uint32_t dynamic_offset_start;
1587    } set[MAX_SETS];
1588 
1589    uint32_t num_sets;
1590 
1591    /* Shader stages that are declared to use descriptors from this layout */
1592    uint32_t shader_stages;
1593 
1594    uint32_t dynamic_offset_count;
1595    uint32_t push_constant_size;
1596 };
1597 
1598 /*
1599  * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1600  * it to be big enough to include the max value for all of them.
1601  *
1602  * FIXME: one alternative would be to allocate the map as big as you need for
1603  * each descriptor type. That would means more individual allocations.
1604  */
1605 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
1606                                  MAX_UNIFORM_BUFFERS,      \
1607                                  MAX_STORAGE_BUFFERS)
1608 
1609 
1610 struct v3dv_descriptor_map {
1611    /* TODO: avoid fixed size array/justify the size */
1612    unsigned num_desc; /* Number of descriptors  */
1613    int set[DESCRIPTOR_MAP_SIZE];
1614    int binding[DESCRIPTOR_MAP_SIZE];
1615    int array_index[DESCRIPTOR_MAP_SIZE];
1616    int array_size[DESCRIPTOR_MAP_SIZE];
1617 
1618    /* NOTE: the following is only for sampler, but this is the easier place to
1619     * put it.
1620     */
1621    uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1622 };
1623 
1624 struct v3dv_sampler {
1625    struct vk_object_base base;
1626 
1627    bool compare_enable;
1628    bool unnormalized_coordinates;
1629    bool clamp_to_transparent_black_border;
1630 
1631    /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1632     * configuration. If needed it will be copied to the descriptor info during
1633     * UpdateDescriptorSets
1634     */
1635    uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1636 };
1637 
1638 struct v3dv_descriptor_template_entry {
1639    /* The type of descriptor in this entry */
1640    VkDescriptorType type;
1641 
1642    /* Binding in the descriptor set */
1643    uint32_t binding;
1644 
1645    /* Offset at which to write into the descriptor set binding */
1646    uint32_t array_element;
1647 
1648    /* Number of elements to write into the descriptor set binding */
1649    uint32_t array_count;
1650 
1651    /* Offset into the user provided data */
1652    size_t offset;
1653 
1654    /* Stride between elements into the user provided data */
1655    size_t stride;
1656 };
1657 
1658 struct v3dv_descriptor_update_template {
1659    struct vk_object_base base;
1660 
1661    VkPipelineBindPoint bind_point;
1662 
1663    /* The descriptor set this template corresponds to. This value is only
1664     * valid if the template was created with the templateType
1665     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1666     */
1667    uint8_t set;
1668 
1669    /* Number of entries in this template */
1670    uint32_t entry_count;
1671 
1672    /* Entries of the template */
1673    struct v3dv_descriptor_template_entry entries[0];
1674 };
1675 
1676 
1677 /* We keep two special values for the sampler idx that represents exactly when a
1678  * sampler is not needed/provided. The main use is that even if we don't have
1679  * sampler, we still need to do the output unpacking (through
1680  * nir_lower_tex). The easier way to do this is to add those special "no
1681  * sampler" in the sampler_map, and then use the proper unpacking for that
1682  * case.
1683  *
1684  * We have one when we want a 16bit output size, and other when we want a
1685  * 32bit output size. We use the info coming from the RelaxedPrecision
1686  * decoration to decide between one and the other.
1687  */
1688 #define V3DV_NO_SAMPLER_16BIT_IDX 0
1689 #define V3DV_NO_SAMPLER_32BIT_IDX 1
1690 
1691 /*
1692  * Following two methods are using on the combined to/from texture/sampler
1693  * indices maps at v3dv_pipeline.
1694  */
1695 static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,uint32_t sampler_index)1696 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1697                                         uint32_t sampler_index)
1698 {
1699    return texture_index << 24 | sampler_index;
1700 }
1701 
1702 static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,uint32_t * texture_index,uint32_t * sampler_index)1703 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1704                                         uint32_t *texture_index,
1705                                         uint32_t *sampler_index)
1706 {
1707    uint32_t texture = combined_index_key >> 24;
1708    uint32_t sampler = combined_index_key & 0xffffff;
1709 
1710    if (texture_index)
1711       *texture_index = texture;
1712 
1713    if (sampler_index)
1714       *sampler_index = sampler;
1715 }
1716 
1717 struct v3dv_descriptor_maps {
1718    struct v3dv_descriptor_map ubo_map;
1719    struct v3dv_descriptor_map ssbo_map;
1720    struct v3dv_descriptor_map sampler_map;
1721    struct v3dv_descriptor_map texture_map;
1722 };
1723 
1724 /* The structure represents data shared between different objects, like the
1725  * pipeline and the pipeline cache, so we ref count it to know when it should
1726  * be freed.
1727  */
1728 struct v3dv_pipeline_shared_data {
1729    uint32_t ref_cnt;
1730 
1731    unsigned char sha1_key[20];
1732 
1733    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1734    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1735 
1736    struct v3dv_bo *assembly_bo;
1737 };
1738 
1739 struct v3dv_pipeline {
1740    struct vk_object_base base;
1741 
1742    struct v3dv_device *device;
1743 
1744    VkShaderStageFlags active_stages;
1745 
1746    struct v3dv_render_pass *pass;
1747    struct v3dv_subpass *subpass;
1748 
1749    /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1750     * to track binning shaders. Note these will be freed once the pipeline
1751     * has been compiled.
1752     */
1753    struct v3dv_pipeline_stage *vs;
1754    struct v3dv_pipeline_stage *vs_bin;
1755    struct v3dv_pipeline_stage *gs;
1756    struct v3dv_pipeline_stage *gs_bin;
1757    struct v3dv_pipeline_stage *fs;
1758    struct v3dv_pipeline_stage *cs;
1759 
1760    /* Flags for whether optional pipeline stages are present, for convenience */
1761    bool has_gs;
1762 
1763    /* Spilling memory requirements */
1764    struct {
1765       struct v3dv_bo *bo;
1766       uint32_t size_per_thread;
1767    } spill;
1768 
1769    struct v3dv_dynamic_state dynamic_state;
1770 
1771    struct v3dv_pipeline_layout *layout;
1772 
1773    /* Whether this pipeline enables depth writes */
1774    bool z_updates_enable;
1775 
1776    enum v3dv_ez_state ez_state;
1777 
1778    bool msaa;
1779    bool sample_rate_shading;
1780    uint32_t sample_mask;
1781 
1782    bool primitive_restart;
1783 
1784    /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
1785     * array with such binding
1786     */
1787    struct v3dv_pipeline_vertex_binding {
1788       uint32_t stride;
1789       uint32_t instance_divisor;
1790    } vb[MAX_VBS];
1791    uint32_t vb_count;
1792 
1793    /* Note that a lot of info from VkVertexInputAttributeDescription is
1794     * already prepacked, so here we are only storing those that need recheck
1795     * later. The array must be indexed by driver location, since that is the
1796     * order in which we need to emit the attributes.
1797     */
1798    struct v3dv_pipeline_vertex_attrib {
1799       uint32_t binding;
1800       uint32_t offset;
1801       VkFormat vk_format;
1802    } va[MAX_VERTEX_ATTRIBS];
1803    uint32_t va_count;
1804 
1805    enum pipe_prim_type topology;
1806 
1807    struct v3dv_pipeline_shared_data *shared_data;
1808 
1809    /* In general we can reuse v3dv_device->default_attribute_float, so note
1810     * that the following can be NULL.
1811     *
1812     * FIXME: the content of this BO will be small, so it could be improved to
1813     * be uploaded to a common BO. But as in most cases it will be NULL, it is
1814     * not a priority.
1815     */
1816    struct v3dv_bo *default_attribute_values;
1817 
1818    struct vpm_config vpm_cfg;
1819    struct vpm_config vpm_cfg_bin;
1820 
1821    /* If the pipeline should emit any of the stencil configuration packets */
1822    bool emit_stencil_cfg[2];
1823 
1824    /* Blend state */
1825    struct {
1826       /* Per-RT bit mask with blend enables */
1827       uint8_t enables;
1828       /* Per-RT prepacked blend config packets */
1829       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
1830       /* Flag indicating whether the blend factors in use require
1831        * color constants.
1832        */
1833       bool needs_color_constants;
1834       /* Mask with enabled color channels for each RT (4 bits per RT) */
1835       uint32_t color_write_masks;
1836    } blend;
1837 
1838    /* Depth bias */
1839    struct {
1840       bool enabled;
1841       bool is_z16;
1842    } depth_bias;
1843 
1844    /* Packets prepacked during pipeline creation
1845     */
1846    uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
1847    uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
1848    uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
1849    uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
1850                         MAX_VERTEX_ATTRIBS];
1851    uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
1852 };
1853 
1854 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)1855 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
1856 {
1857    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
1858           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
1859    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
1860       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1861 }
1862 
1863 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)1864 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
1865                                      struct v3dv_pipeline *pipeline)
1866 {
1867    if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
1868       return &cmd_buffer->state.compute.descriptor_state;
1869    else
1870       return &cmd_buffer->state.gfx.descriptor_state;
1871 }
1872 
1873 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
1874 
1875 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
1876 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
1877 
1878 #ifdef DEBUG
1879 #define v3dv_debug_ignored_stype(sType) \
1880    fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
1881 #else
1882 #define v3dv_debug_ignored_stype(sType)
1883 #endif
1884 
1885 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
1886 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
1887 const struct v3dv_format *
1888 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
1889                                uint32_t bpp, VkFormat *out_vk_format);
1890 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
1891                                           VkFormat vk_format,
1892                                           VkFormatFeatureFlags features);
1893 
1894 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
1895                                          struct v3dv_pipeline *pipeline,
1896                                          struct v3dv_shader_variant *variant);
1897 
1898 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
1899                                                     struct v3dv_pipeline *pipeline,
1900                                                     struct v3dv_shader_variant *variant,
1901                                                     uint32_t **wg_count_offsets);
1902 
1903 struct v3dv_shader_variant *
1904 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1905                         struct v3dv_pipeline_cache *cache,
1906                         struct v3d_key *key,
1907                         size_t key_size,
1908                         const VkAllocationCallbacks *pAllocator,
1909                         VkResult *out_vk_result);
1910 
1911 struct v3dv_shader_variant *
1912 v3dv_shader_variant_create(struct v3dv_device *device,
1913                            enum broadcom_shader_stage stage,
1914                            struct v3d_prog_data *prog_data,
1915                            uint32_t prog_data_size,
1916                            uint32_t assembly_offset,
1917                            uint64_t *qpu_insts,
1918                            uint32_t qpu_insts_size,
1919                            VkResult *out_vk_result);
1920 
1921 void
1922 v3dv_shader_variant_destroy(struct v3dv_device *device,
1923                             struct v3dv_shader_variant *variant);
1924 
1925 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)1926 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
1927 {
1928    assert(shared_data && shared_data->ref_cnt >= 1);
1929    p_atomic_inc(&shared_data->ref_cnt);
1930 }
1931 
1932 void
1933 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
1934                                   struct v3dv_pipeline_shared_data *shared_data);
1935 
1936 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)1937 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
1938                                 struct v3dv_pipeline_shared_data *shared_data)
1939 {
1940    assert(shared_data && shared_data->ref_cnt >= 1);
1941    if (p_atomic_dec_zero(&shared_data->ref_cnt))
1942       v3dv_pipeline_shared_data_destroy(device, shared_data);
1943 }
1944 
1945 struct v3dv_descriptor *
1946 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
1947                                    struct v3dv_descriptor_map *map,
1948                                    struct v3dv_pipeline_layout *pipeline_layout,
1949                                    uint32_t index,
1950                                    uint32_t *dynamic_offset);
1951 
1952 const struct v3dv_sampler *
1953 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
1954                                 struct v3dv_descriptor_map *map,
1955                                 struct v3dv_pipeline_layout *pipeline_layout,
1956                                 uint32_t index);
1957 
1958 struct v3dv_cl_reloc
1959 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
1960                                       struct v3dv_descriptor_state *descriptor_state,
1961                                       struct v3dv_descriptor_map *map,
1962                                       struct v3dv_pipeline_layout *pipeline_layout,
1963                                       uint32_t index);
1964 
1965 struct v3dv_cl_reloc
1966 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
1967                                              struct v3dv_descriptor_state *descriptor_state,
1968                                              struct v3dv_descriptor_map *map,
1969                                              struct v3dv_pipeline_layout *pipeline_layout,
1970                                              uint32_t index);
1971 
1972 const struct v3dv_format*
1973 v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_state,
1974                                        struct v3dv_descriptor_map *map,
1975                                        struct v3dv_pipeline_layout *pipeline_layout,
1976                                        uint32_t index,
1977                                        VkFormat *out_vk_format);
1978 
1979 struct v3dv_bo*
1980 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
1981                                    struct v3dv_descriptor_map *map,
1982                                    struct v3dv_pipeline_layout *pipeline_layout,
1983                                    uint32_t index);
1984 
1985 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)1986 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
1987                         const struct v3dv_descriptor_set_binding_layout *binding)
1988 {
1989    assert(binding->immutable_samplers_offset);
1990    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
1991 }
1992 
1993 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
1994                               struct v3dv_device *device,
1995                               VkPipelineCacheCreateFlags,
1996                               bool cache_enabled);
1997 
1998 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
1999 
2000 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2001                                     struct v3dv_pipeline_cache *cache,
2002                                     nir_shader *nir,
2003                                     unsigned char sha1_key[20]);
2004 
2005 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2006                                                struct v3dv_pipeline_cache *cache,
2007                                                const nir_shader_compiler_options *nir_options,
2008                                                unsigned char sha1_key[20]);
2009 
2010 struct v3dv_pipeline_shared_data *
2011 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2012                                         unsigned char sha1_key[20],
2013                                         bool *cache_hit);
2014 
2015 void
2016 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2017                                     struct v3dv_pipeline_cache *cache);
2018 
2019 struct v3dv_bo *
2020 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2021                                               struct v3dv_pipeline *pipeline);
2022 
2023 void v3dv_shader_module_internal_init(struct v3dv_device *device,
2024                                       struct vk_shader_module *module,
2025                                       nir_shader *nir);
2026 
2027 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2028    VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2029 
2030 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2031                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2032 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2033 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2034                        VK_OBJECT_TYPE_INSTANCE)
2035 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2036                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2037 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2038 
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool,base,VkCommandPool,VK_OBJECT_TYPE_COMMAND_POOL)2039 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, base, VkCommandPool,
2040                                VK_OBJECT_TYPE_COMMAND_POOL)
2041 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2042                                VK_OBJECT_TYPE_BUFFER)
2043 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2044                                VK_OBJECT_TYPE_BUFFER_VIEW)
2045 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2046                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2047 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2048                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2049 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2050                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2051 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2052                                VkDescriptorSetLayout,
2053                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2054 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2055                                VkDescriptorUpdateTemplate,
2056                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2057 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2058 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
2059 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2060                                VK_OBJECT_TYPE_FRAMEBUFFER)
2061 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2062                                VK_OBJECT_TYPE_IMAGE)
2063 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2064                                VK_OBJECT_TYPE_IMAGE_VIEW)
2065 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2066                                VK_OBJECT_TYPE_PIPELINE)
2067 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2068                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2069 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2070                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2071 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2072                                VK_OBJECT_TYPE_QUERY_POOL)
2073 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2074                                VK_OBJECT_TYPE_RENDER_PASS)
2075 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2076                                VK_OBJECT_TYPE_SAMPLER)
2077 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore,
2078                                VK_OBJECT_TYPE_SEMAPHORE)
2079 
2080 static inline int
2081 v3dv_ioctl(int fd, unsigned long request, void *arg)
2082 {
2083    if (using_v3d_simulator)
2084       return v3d_simulator_ioctl(fd, request, arg);
2085    else
2086       return drmIoctl(fd, request, arg);
2087 }
2088 
2089 /* Flags OOM conditions in command buffer state.
2090  *
2091  * Note: notice that no-op jobs don't have a command buffer reference.
2092  */
2093 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2094 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2095 {
2096    if (cmd_buffer) {
2097       cmd_buffer->state.oom = true;
2098    } else {
2099       assert(job);
2100       if (job->cmd_buffer)
2101          job->cmd_buffer->state.oom = true;
2102    }
2103 }
2104 
2105 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2106    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2107    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2108       return;                                                       \
2109    const struct v3dv_job *__job = _job;                             \
2110    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2111       return;                                                       \
2112 } while(0)                                                          \
2113 
2114 static inline uint32_t
u64_hash(const void * key)2115 u64_hash(const void *key)
2116 {
2117    return _mesa_hash_data(key, sizeof(uint64_t));
2118 }
2119 
2120 static inline bool
u64_compare(const void * key1,const void * key2)2121 u64_compare(const void *key1, const void *key2)
2122 {
2123    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2124 }
2125 
2126 /* Helper to call hw ver speficic functions */
2127 #define v3dv_X(device, thing) ({                      \
2128    __typeof(&v3d42_##thing) v3d_X_thing;              \
2129    switch (device->devinfo.ver) {                     \
2130    case 42:                                           \
2131       v3d_X_thing = &v3d42_##thing;                   \
2132       break;                                          \
2133    default:                                           \
2134       unreachable("Unsupported hardware generation"); \
2135    }                                                  \
2136    v3d_X_thing;                                       \
2137 })
2138 
2139 
2140 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2141  * define v3dX for each version supported, because when we compile code that
2142  * is not version-specific, all version-specific macros need to be already
2143  * defined.
2144  */
2145 #ifdef v3dX
2146 #  include "v3dvx_private.h"
2147 #else
2148 #  define v3dX(x) v3d42_##x
2149 #  include "v3dvx_private.h"
2150 #  undef v3dX
2151 #endif
2152 
2153 #endif /* V3DV_PRIVATE_H */
2154