1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on radv driver which is:
8  * Copyright © 2016 Red Hat.
9  * Copyright © 2016 Bas Nieuwenhuizen
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
32 
33 #include <stdio.h>
34 #include <string.h>
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
38 
39 #include "vk_device.h"
40 #include "vk_format.h"
41 #include "vk_instance.h"
42 #include "vk_image.h"
43 #include "vk_log.h"
44 #include "vk_physical_device.h"
45 #include "vk_shader_module.h"
46 #include "vk_util.h"
47 
48 #include "vk_command_buffer.h"
49 #include "vk_command_pool.h"
50 #include "vk_queue.h"
51 
52 #include <xf86drm.h>
53 
54 #ifdef HAVE_VALGRIND
55 #include <valgrind.h>
56 #include <memcheck.h>
57 #define VG(x) x
58 #else
59 #define VG(x) ((void)0)
60 #endif
61 
62 #include "v3dv_limits.h"
63 
64 #include "common/v3d_device_info.h"
65 #include "common/v3d_limits.h"
66 #include "common/v3d_tiling.h"
67 #include "common/v3d_util.h"
68 
69 #include "compiler/shader_enums.h"
70 #include "compiler/spirv/nir_spirv.h"
71 
72 #include "compiler/v3d_compiler.h"
73 
74 #include "vk_debug_report.h"
75 #include "util/set.h"
76 #include "util/hash_table.h"
77 #include "util/sparse_array.h"
78 #include "util/xmlconfig.h"
79 #include "u_atomic.h"
80 
81 #include "v3dv_entrypoints.h"
82 #include "v3dv_bo.h"
83 
84 #include "drm-uapi/v3d_drm.h"
85 
86 #include "vk_alloc.h"
87 #include "simulator/v3d_simulator.h"
88 
89 #include "v3dv_cl.h"
90 
91 #include "wsi_common.h"
92 
93 /* A non-fatal assert.  Useful for debugging. */
94 #ifdef DEBUG
95 #define v3dv_assert(x) ({ \
96    if (unlikely(!(x))) \
97       fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
98 })
99 #else
100 #define v3dv_assert(x)
101 #endif
102 
103 #define perf_debug(...) do {                       \
104    if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))       \
105       fprintf(stderr, __VA_ARGS__);                \
106 } while (0)
107 
108 struct v3dv_instance;
109 
110 #ifdef USE_V3D_SIMULATOR
111 #define using_v3d_simulator true
112 #else
113 #define using_v3d_simulator false
114 #endif
115 
116 struct v3d_simulator_file;
117 
118 /* Minimum required by the Vulkan 1.1 spec */
119 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
120 
121 struct v3dv_physical_device {
122    struct vk_physical_device vk;
123 
124    char *name;
125    int32_t render_fd;
126    int32_t display_fd;
127    int32_t master_fd;
128 
129    /* We need these because it is not clear how to detect
130     * valid devids in a portable way
131      */
132    bool has_primary;
133    bool has_render;
134 
135    dev_t primary_devid;
136    dev_t render_devid;
137 
138    uint8_t driver_build_sha1[20];
139    uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
140    uint8_t device_uuid[VK_UUID_SIZE];
141    uint8_t driver_uuid[VK_UUID_SIZE];
142 
143    struct disk_cache *disk_cache;
144 
145    mtx_t mutex;
146 
147    struct wsi_device wsi_device;
148 
149    VkPhysicalDeviceMemoryProperties memory;
150 
151    struct v3d_device_info devinfo;
152 
153    struct v3d_simulator_file *sim_file;
154 
155    const struct v3d_compiler *compiler;
156    uint32_t next_program_id;
157 
158    /* This array holds all our 'struct v3dv_bo' allocations. We use this
159     * so we can add a refcount to our BOs and check if a particular BO
160     * was already allocated in this device using its GEM handle. This is
161     * necessary to properly manage BO imports, because the kernel doesn't
162     * refcount the underlying BO memory.
163     *
164     * Specifically, when self-importing (i.e. importing a BO into the same
165     * device that created it), the kernel will give us the same BO handle
166     * for both BOs and we must only free it once when  both references are
167     * freed. Otherwise, if we are not self-importing, we get two differnt BO
168     * handles, and we want to free each one individually.
169     *
170     * The BOs in this map all have a refcnt with the referece counter and
171     * only self-imported BOs will ever have a refcnt > 1.
172     */
173    struct util_sparse_array bo_map;
174 
175    struct {
176       bool merge_jobs;
177    } options;
178 
179    struct {
180       bool multisync;
181    } caps;
182 };
183 
184 VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
185                                               struct v3dv_physical_device *pdevice,
186                                               VkIcdSurfaceBase *surface);
187 
188 static inline struct v3dv_bo *
v3dv_device_lookup_bo(struct v3dv_physical_device * device,uint32_t handle)189 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
190 {
191    return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
192 }
193 
194 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
195 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
196 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
197                                                      uint32_t index);
198 
199 void v3dv_meta_clear_init(struct v3dv_device *device);
200 void v3dv_meta_clear_finish(struct v3dv_device *device);
201 
202 void v3dv_meta_blit_init(struct v3dv_device *device);
203 void v3dv_meta_blit_finish(struct v3dv_device *device);
204 
205 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
206 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
207 
208 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
209                            const VkOffset3D *offset,
210                            VkFormat *compat_format);
211 
212 struct v3dv_instance {
213    struct vk_instance vk;
214 
215    int physicalDeviceCount;
216    struct v3dv_physical_device physicalDevice;
217 
218    bool pipeline_cache_enabled;
219    bool default_pipeline_cache_enabled;
220 };
221 
222 /* Tracks wait threads spawned from a single vkQueueSubmit call */
223 struct v3dv_queue_submit_wait_info {
224    /*  struct vk_object_base base; ?*/
225    struct list_head list_link;
226 
227    struct v3dv_device *device;
228 
229    /* List of wait threads spawned for any command buffers in a particular
230     * call to vkQueueSubmit.
231     */
232    uint32_t wait_thread_count;
233    struct {
234       pthread_t thread;
235       bool finished;
236    } wait_threads[16];
237 
238    /* The master wait thread for the entire submit. This will wait for all
239     * other threads in this submit to complete  before processing signal
240     * semaphores and fences.
241     */
242    pthread_t master_wait_thread;
243 
244    /* List of semaphores (and fence) to signal after all wait threads completed
245     * and all command buffer jobs in the submission have been sent to the GPU.
246     */
247    uint32_t signal_semaphore_count;
248    VkSemaphore *signal_semaphores;
249    VkFence fence;
250 };
251 
252 struct v3dv_queue {
253    struct vk_queue vk;
254 
255    struct v3dv_device *device;
256 
257    /* A list of active v3dv_queue_submit_wait_info */
258    struct list_head submit_wait_list;
259 
260    /* A mutex to prevent concurrent access to the list of wait threads */
261    mtx_t mutex;
262 
263    /* A mutex to prevent concurrent noop job submissions */
264    mtx_t noop_mutex;
265 
266    struct v3dv_job *noop_job;
267 };
268 
269 #define V3DV_META_BLIT_CACHE_KEY_SIZE              (4 * sizeof(uint32_t))
270 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
271                                                     sizeof(VkComponentMapping))
272 
273 struct v3dv_meta_color_clear_pipeline {
274    VkPipeline pipeline;
275    VkRenderPass pass;
276    bool cached;
277    uint64_t key;
278 };
279 
280 struct v3dv_meta_depth_clear_pipeline {
281    VkPipeline pipeline;
282    uint64_t key;
283 };
284 
285 struct v3dv_meta_blit_pipeline {
286    VkPipeline pipeline;
287    VkRenderPass pass;
288    VkRenderPass pass_no_load;
289    uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
290 };
291 
292 struct v3dv_meta_texel_buffer_copy_pipeline {
293    VkPipeline pipeline;
294    VkRenderPass pass;
295    VkRenderPass pass_no_load;
296    uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
297 };
298 
299 struct v3dv_pipeline_key {
300    bool robust_buffer_access;
301    uint8_t topology;
302    uint8_t logicop_func;
303    bool msaa;
304    bool sample_coverage;
305    bool sample_alpha_to_coverage;
306    bool sample_alpha_to_one;
307    uint8_t cbufs;
308    struct {
309       enum pipe_format format;
310       uint8_t swizzle[4];
311    } color_fmt[V3D_MAX_DRAW_BUFFERS];
312    uint8_t f32_color_rb;
313    uint32_t va_swap_rb_mask;
314    bool has_multiview;
315 };
316 
317 struct v3dv_pipeline_cache_stats {
318    uint32_t miss;
319    uint32_t hit;
320    uint32_t count;
321 };
322 
323 /* Equivalent to gl_shader_stage, but including the coordinate shaders
324  *
325  * FIXME: perhaps move to common
326  */
327 enum broadcom_shader_stage {
328    BROADCOM_SHADER_VERTEX,
329    BROADCOM_SHADER_VERTEX_BIN,
330    BROADCOM_SHADER_GEOMETRY,
331    BROADCOM_SHADER_GEOMETRY_BIN,
332    BROADCOM_SHADER_FRAGMENT,
333    BROADCOM_SHADER_COMPUTE,
334 };
335 
336 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
337 
338 /* Assumes that coordinate shaders will be custom-handled by the caller */
339 static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)340 gl_shader_stage_to_broadcom(gl_shader_stage stage)
341 {
342    switch (stage) {
343    case MESA_SHADER_VERTEX:
344       return BROADCOM_SHADER_VERTEX;
345    case MESA_SHADER_GEOMETRY:
346       return BROADCOM_SHADER_GEOMETRY;
347    case MESA_SHADER_FRAGMENT:
348       return BROADCOM_SHADER_FRAGMENT;
349    case MESA_SHADER_COMPUTE:
350       return BROADCOM_SHADER_COMPUTE;
351    default:
352       unreachable("Unknown gl shader stage");
353    }
354 }
355 
356 static inline gl_shader_stage
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)357 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
358 {
359    switch (stage) {
360    case BROADCOM_SHADER_VERTEX:
361    case BROADCOM_SHADER_VERTEX_BIN:
362       return MESA_SHADER_VERTEX;
363    case BROADCOM_SHADER_GEOMETRY:
364    case BROADCOM_SHADER_GEOMETRY_BIN:
365       return MESA_SHADER_GEOMETRY;
366    case BROADCOM_SHADER_FRAGMENT:
367       return MESA_SHADER_FRAGMENT;
368    case BROADCOM_SHADER_COMPUTE:
369       return MESA_SHADER_COMPUTE;
370    default:
371       unreachable("Unknown broadcom shader stage");
372    }
373 }
374 
375 static inline bool
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)376 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
377 {
378    switch (stage) {
379    case BROADCOM_SHADER_VERTEX_BIN:
380    case BROADCOM_SHADER_GEOMETRY_BIN:
381       return true;
382    default:
383       return false;
384    }
385 }
386 
387 static inline bool
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)388 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
389 {
390    switch (stage) {
391    case BROADCOM_SHADER_VERTEX:
392    case BROADCOM_SHADER_GEOMETRY:
393       return true;
394    default:
395       return false;
396    }
397 }
398 
399 static inline enum broadcom_shader_stage
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)400 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
401 {
402    switch (stage) {
403    case BROADCOM_SHADER_VERTEX:
404       return BROADCOM_SHADER_VERTEX_BIN;
405    case BROADCOM_SHADER_GEOMETRY:
406       return BROADCOM_SHADER_GEOMETRY_BIN;
407    default:
408       unreachable("Invalid shader stage");
409    }
410 }
411 
412 static inline const char *
broadcom_shader_stage_name(enum broadcom_shader_stage stage)413 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
414 {
415    switch(stage) {
416    case BROADCOM_SHADER_VERTEX_BIN:
417       return "MESA_SHADER_VERTEX_BIN";
418    case BROADCOM_SHADER_GEOMETRY_BIN:
419       return "MESA_SHADER_GEOMETRY_BIN";
420    default:
421       return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
422    }
423 }
424 
425 struct v3dv_pipeline_cache {
426    struct vk_object_base base;
427 
428    struct v3dv_device *device;
429    mtx_t mutex;
430 
431    struct hash_table *nir_cache;
432    struct v3dv_pipeline_cache_stats nir_stats;
433 
434    struct hash_table *cache;
435    struct v3dv_pipeline_cache_stats stats;
436 
437    /* For VK_EXT_pipeline_creation_cache_control. */
438    bool externally_synchronized;
439 };
440 
441 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
442  * tfu), we still need a syncobj to track the last overall job submitted
443  * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
444  * start expecting multisync to be present and drop the legacy implementation
445  * together with this V3DV_QUEUE_ANY tracker.
446  */
447 enum v3dv_queue_type {
448    V3DV_QUEUE_CL = 0,
449    V3DV_QUEUE_CSD,
450    V3DV_QUEUE_TFU,
451    V3DV_QUEUE_ANY,
452    V3DV_QUEUE_COUNT,
453 };
454 
455 /* For each GPU queue, we use a syncobj to track the last job submitted. We
456  * set the flag `first` to determine when we are starting a new cmd buffer
457  * batch and therefore a job submitted to a given queue will be the first in a
458  * cmd buf batch.
459  */
460 struct v3dv_last_job_sync {
461    /* If the job is the first submitted to a GPU queue in a cmd buffer batch */
462    bool first[V3DV_QUEUE_COUNT];
463    /* Array of syncobj to track the last job submitted to a GPU queue */
464    uint32_t syncs[V3DV_QUEUE_COUNT];
465 };
466 
467 struct v3dv_device {
468    struct vk_device vk;
469 
470    struct v3dv_instance *instance;
471    struct v3dv_physical_device *pdevice;
472 
473    struct v3d_device_info devinfo;
474    struct v3dv_queue queue;
475 
476    /* Syncobjs to track the last job submitted to any GPU queue */
477    struct v3dv_last_job_sync last_job_syncs;
478 
479    /* A mutex to prevent concurrent access to last_job_sync from the queue */
480    mtx_t mutex;
481 
482    /* Resources used for meta operations */
483    struct {
484       mtx_t mtx;
485       struct {
486          VkPipelineLayout p_layout;
487          struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
488       } color_clear;
489       struct {
490          VkPipelineLayout p_layout;
491          struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
492       } depth_clear;
493       struct {
494          VkDescriptorSetLayout ds_layout;
495          VkPipelineLayout p_layout;
496          struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
497       } blit;
498       struct {
499          VkDescriptorSetLayout ds_layout;
500          VkPipelineLayout p_layout;
501          struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
502       } texel_buffer_copy;
503    } meta;
504 
505    struct v3dv_bo_cache {
506       /** List of struct v3d_bo freed, by age. */
507       struct list_head time_list;
508       /** List of struct v3d_bo freed, per size, by age. */
509       struct list_head *size_list;
510       uint32_t size_list_size;
511 
512       mtx_t lock;
513 
514       uint32_t cache_size;
515       uint32_t cache_count;
516       uint32_t max_cache_size;
517    } bo_cache;
518 
519    uint32_t bo_size;
520    uint32_t bo_count;
521 
522    struct v3dv_pipeline_cache default_pipeline_cache;
523 
524    /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
525     * following covers the most common case, that is all attributes format
526     * being float being float, allowing us to reuse the same BO for all
527     * pipelines matching this requirement. Pipelines that need integer
528     * attributes will create their own BO.
529     */
530    struct v3dv_bo *default_attribute_float;
531    VkPhysicalDeviceFeatures features;
532 
533 #ifdef ANDROID
534    const void *gralloc;
535    enum {
536       V3DV_GRALLOC_UNKNOWN,
537       V3DV_GRALLOC_CROS,
538       V3DV_GRALLOC_OTHER,
539    } gralloc_type;
540 #endif
541 };
542 
543 struct v3dv_device_memory {
544    struct vk_object_base base;
545 
546    struct v3dv_bo *bo;
547    const VkMemoryType *type;
548    bool is_for_wsi;
549 };
550 
551 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
552 #define TEXTURE_DATA_FORMAT_NO     255
553 
554 struct v3dv_format {
555    bool supported;
556 
557    /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
558    uint8_t rt_type;
559 
560    /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
561    uint8_t tex_type;
562 
563    /* Swizzle to apply to the RGBA shader output for storing to the tile
564     * buffer, to the RGBA tile buffer to produce shader input (for
565     * blending), and for turning the rgba8888 texture sampler return
566     * value into shader rgba values.
567     */
568    uint8_t swizzle[4];
569 
570    /* Whether the return value is 16F/I/UI or 32F/I/UI. */
571    uint8_t return_size;
572 
573    /* If the format supports (linear) filtering when texturing. */
574    bool supports_filtering;
575 };
576 
577 struct v3d_resource_slice {
578    uint32_t offset;
579    uint32_t stride;
580    uint32_t padded_height;
581    /* Size of a single pane of the slice.  For 3D textures, there will be
582     * a number of panes equal to the minified, power-of-two-aligned
583     * depth.
584     */
585    uint32_t size;
586    uint8_t ub_pad;
587    enum v3d_tiling_mode tiling;
588    uint32_t padded_height_of_output_image_in_uif_blocks;
589 };
590 
591 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
592 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
593 
594 struct v3dv_image {
595    struct vk_image vk;
596 
597    const struct v3dv_format *format;
598    uint32_t cpp;
599    bool tiled;
600 
601    struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
602    uint64_t size; /* Total size in bytes */
603    uint32_t cube_map_stride;
604 
605    struct v3dv_device_memory *mem;
606    VkDeviceSize mem_offset;
607    uint32_t alignment;
608 
609 #ifdef ANDROID
610    /* Image is backed by VK_ANDROID_native_buffer, */
611    bool is_native_buffer_memory;
612 #endif
613 };
614 
615 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
616 
617 /* Pre-generating packets needs to consider changes in packet sizes across hw
618  * versions. Keep things simple and allocate enough space for any supported
619  * version. We ensure the size is large enough through static asserts.
620  */
621 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
622 #define V3DV_SAMPLER_STATE_LENGTH 24
623 #define V3DV_BLEND_CFG_LENGTH 5
624 #define V3DV_CFG_BITS_LENGTH 4
625 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
626 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
627 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
628 #define V3DV_STENCIL_CFG_LENGTH 6
629 
630 struct v3dv_image_view {
631    struct vk_image_view vk;
632 
633    const struct v3dv_format *format;
634    bool swap_rb;
635    bool channel_reverse;
636    uint32_t internal_bpp;
637    uint32_t internal_type;
638    uint32_t offset;
639 
640    /* Precomputed (composed from createinfo->components and formar swizzle)
641     * swizzles to pass in to the shader key.
642     *
643     * This could be also included on the descriptor bo, but the shader state
644     * packet doesn't need it on a bo, so we can just avoid a memory copy
645     */
646    uint8_t swizzle[4];
647 
648    /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
649     * during UpdateDescriptorSets.
650     *
651     * Empirical tests show that cube arrays need a different shader state
652     * depending on whether they are used with a sampler or not, so for these
653     * we generate two states and select the one to use based on the descriptor
654     * type.
655     */
656    uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
657 };
658 
659 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
660 
661 struct v3dv_buffer {
662    struct vk_object_base base;
663 
664    VkDeviceSize size;
665    VkBufferUsageFlags usage;
666    uint32_t alignment;
667 
668    struct v3dv_device_memory *mem;
669    VkDeviceSize mem_offset;
670 };
671 
672 struct v3dv_buffer_view {
673    struct vk_object_base base;
674 
675    struct v3dv_buffer *buffer;
676 
677    VkFormat vk_format;
678    const struct v3dv_format *format;
679    uint32_t internal_bpp;
680    uint32_t internal_type;
681 
682    uint32_t offset;
683    uint32_t size;
684    uint32_t num_elements;
685 
686    /* Prepacked TEXTURE_SHADER_STATE. */
687    uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
688 };
689 
690 struct v3dv_subpass_attachment {
691    uint32_t attachment;
692    VkImageLayout layout;
693 };
694 
695 struct v3dv_subpass {
696    uint32_t input_count;
697    struct v3dv_subpass_attachment *input_attachments;
698 
699    uint32_t color_count;
700    struct v3dv_subpass_attachment *color_attachments;
701    struct v3dv_subpass_attachment *resolve_attachments;
702 
703    struct v3dv_subpass_attachment ds_attachment;
704    struct v3dv_subpass_attachment ds_resolve_attachment;
705    bool resolve_depth, resolve_stencil;
706 
707    /* If we need to emit the clear of the depth/stencil attachment using a
708     * a draw call instead of using the TLB (GFXH-1461).
709     */
710    bool do_depth_clear_with_draw;
711    bool do_stencil_clear_with_draw;
712 
713    /* Multiview */
714    uint32_t view_mask;
715 };
716 
717 struct v3dv_render_pass_attachment {
718    VkAttachmentDescription2 desc;
719 
720    uint32_t first_subpass;
721    uint32_t last_subpass;
722 
723    /* When multiview is enabled, we no longer care about when a particular
724     * attachment is first or last used in a render pass, since not all views
725     * in the attachment will meet that criteria. Instead, we need to track
726     * each individual view (layer) in each attachment and emit our stores,
727     * loads and clears accordingly.
728     */
729    struct {
730       uint32_t first_subpass;
731       uint32_t last_subpass;
732    } views[MAX_MULTIVIEW_VIEW_COUNT];
733 
734    /* If this is a multisampled attachment that is going to be resolved,
735     * whether we may be able to use the TLB hardware resolve based on the
736     * attachment format.
737     */
738    bool try_tlb_resolve;
739 };
740 
741 struct v3dv_render_pass {
742    struct vk_object_base base;
743 
744    bool multiview_enabled;
745 
746    uint32_t attachment_count;
747    struct v3dv_render_pass_attachment *attachments;
748 
749    uint32_t subpass_count;
750    struct v3dv_subpass *subpasses;
751 
752    struct v3dv_subpass_attachment *subpass_attachments;
753 };
754 
755 struct v3dv_framebuffer {
756    struct vk_object_base base;
757 
758    uint32_t width;
759    uint32_t height;
760    uint32_t layers;
761 
762    /* Typically, edge tiles in the framebuffer have padding depending on the
763     * underlying tiling layout. One consequnce of this is that when the
764     * framebuffer dimensions are not aligned to tile boundaries, tile stores
765     * would still write full tiles on the edges and write to the padded area.
766     * If the framebuffer is aliasing a smaller region of a larger image, then
767     * we need to be careful with this though, as we won't have padding on the
768     * edge tiles (which typically means that we need to load the tile buffer
769     * before we store).
770     */
771    bool has_edge_padding;
772 
773    uint32_t attachment_count;
774    uint32_t color_attachment_count;
775 
776    /* Notice that elements in 'attachments' will be NULL if the framebuffer
777     * was created imageless. The driver is expected to access attachment info
778     * from the command buffer state instead.
779     */
780    struct v3dv_image_view *attachments[0];
781 };
782 
783 struct v3dv_frame_tiling {
784    uint32_t width;
785    uint32_t height;
786    uint32_t layers;
787    uint32_t render_target_count;
788    uint32_t internal_bpp;
789    bool     msaa;
790    bool     double_buffer;
791    uint32_t tile_width;
792    uint32_t tile_height;
793    uint32_t draw_tiles_x;
794    uint32_t draw_tiles_y;
795    uint32_t supertile_width;
796    uint32_t supertile_height;
797    uint32_t frame_width_in_supertiles;
798    uint32_t frame_height_in_supertiles;
799 };
800 
801 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
802                                        const VkRect2D *area,
803                                        struct v3dv_framebuffer *fb,
804                                        struct v3dv_render_pass *pass,
805                                        uint32_t subpass_idx);
806 
807 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
808  * This happens when we render at least 2 tiles, because in this mode each
809  * tile uses a different half of the tile buffer memory so we can have 2 tiles
810  * in flight (one being stored to memory and the next being rendered). In this
811  * scenario, if we emit a single initial tile clear we would only clear the
812  * first half of the tile buffer.
813  */
814 static inline bool
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling * tiling)815 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
816 {
817    return tiling->double_buffer &&
818           (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
819            tiling->layers > 1);
820 }
821 
822 enum v3dv_cmd_buffer_status {
823    V3DV_CMD_BUFFER_STATUS_NEW           = 0,
824    V3DV_CMD_BUFFER_STATUS_INITIALIZED   = 1,
825    V3DV_CMD_BUFFER_STATUS_RECORDING     = 2,
826    V3DV_CMD_BUFFER_STATUS_EXECUTABLE    = 3
827 };
828 
829 union v3dv_clear_value {
830    uint32_t color[4];
831    struct {
832       float z;
833       uint8_t s;
834    };
835 };
836 
837 struct v3dv_cmd_buffer_attachment_state {
838    /* The original clear value as provided by the Vulkan API */
839    VkClearValue vk_clear_value;
840 
841    /* The hardware clear value */
842    union v3dv_clear_value clear_value;
843 
844    /* The underlying image view (from the framebuffer or, if imageless
845     * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
846     */
847    struct v3dv_image_view *image_view;
848 
849    /* If this is a multisampled attachment with a resolve operation. */
850    bool has_resolve;
851 
852    /* If this is a multisampled attachment with a resolve operation,
853     * whether we can use the TLB for the resolve.
854     */
855    bool use_tlb_resolve;
856 };
857 
858 struct v3dv_viewport_state {
859    uint32_t count;
860    VkViewport viewports[MAX_VIEWPORTS];
861    float translate[MAX_VIEWPORTS][3];
862    float scale[MAX_VIEWPORTS][3];
863 };
864 
865 struct v3dv_scissor_state {
866    uint32_t count;
867    VkRect2D scissors[MAX_SCISSORS];
868 };
869 
870 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
871  * defined as dynamic
872  */
873 enum v3dv_dynamic_state_bits {
874    V3DV_DYNAMIC_VIEWPORT                  = 1 << 0,
875    V3DV_DYNAMIC_SCISSOR                   = 1 << 1,
876    V3DV_DYNAMIC_STENCIL_COMPARE_MASK      = 1 << 2,
877    V3DV_DYNAMIC_STENCIL_WRITE_MASK        = 1 << 3,
878    V3DV_DYNAMIC_STENCIL_REFERENCE         = 1 << 4,
879    V3DV_DYNAMIC_BLEND_CONSTANTS           = 1 << 5,
880    V3DV_DYNAMIC_DEPTH_BIAS                = 1 << 6,
881    V3DV_DYNAMIC_LINE_WIDTH                = 1 << 7,
882    V3DV_DYNAMIC_COLOR_WRITE_ENABLE        = 1 << 8,
883    V3DV_DYNAMIC_ALL                       = (1 << 9) - 1,
884 };
885 
886 /* Flags for dirty pipeline state.
887  */
888 enum v3dv_cmd_dirty_bits {
889    V3DV_CMD_DIRTY_VIEWPORT                  = 1 << 0,
890    V3DV_CMD_DIRTY_SCISSOR                   = 1 << 1,
891    V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK      = 1 << 2,
892    V3DV_CMD_DIRTY_STENCIL_WRITE_MASK        = 1 << 3,
893    V3DV_CMD_DIRTY_STENCIL_REFERENCE         = 1 << 4,
894    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
895    V3DV_CMD_DIRTY_COMPUTE_PIPELINE          = 1 << 6,
896    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 7,
897    V3DV_CMD_DIRTY_INDEX_BUFFER              = 1 << 8,
898    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 9,
899    V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS   = 1 << 10,
900    V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 11,
901    V3DV_CMD_DIRTY_BLEND_CONSTANTS           = 1 << 12,
902    V3DV_CMD_DIRTY_OCCLUSION_QUERY           = 1 << 13,
903    V3DV_CMD_DIRTY_DEPTH_BIAS                = 1 << 14,
904    V3DV_CMD_DIRTY_LINE_WIDTH                = 1 << 15,
905    V3DV_CMD_DIRTY_VIEW_INDEX                = 1 << 16,
906    V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE        = 1 << 17,
907 };
908 
909 struct v3dv_dynamic_state {
910    /**
911     * Bitmask of (1 << VK_DYNAMIC_STATE_*).
912     * Defines the set of saved dynamic state.
913     */
914    uint32_t mask;
915 
916    struct v3dv_viewport_state viewport;
917 
918    struct v3dv_scissor_state scissor;
919 
920    struct {
921       uint32_t front;
922       uint32_t back;
923    } stencil_compare_mask;
924 
925    struct {
926       uint32_t front;
927       uint32_t back;
928    } stencil_write_mask;
929 
930    struct {
931       uint32_t front;
932       uint32_t back;
933    } stencil_reference;
934 
935    float blend_constants[4];
936 
937    struct {
938       float constant_factor;
939       float depth_bias_clamp;
940       float slope_factor;
941    } depth_bias;
942 
943    float line_width;
944 
945    uint32_t color_write_enable;
946 };
947 
948 void v3dv_viewport_compute_xform(const VkViewport *viewport,
949                                  float scale[3],
950                                  float translate[3]);
951 
952 enum v3dv_ez_state {
953    V3D_EZ_UNDECIDED = 0,
954    V3D_EZ_GT_GE,
955    V3D_EZ_LT_LE,
956    V3D_EZ_DISABLED,
957 };
958 
959 enum v3dv_job_type {
960    V3DV_JOB_TYPE_GPU_CL = 0,
961    V3DV_JOB_TYPE_GPU_CL_SECONDARY,
962    V3DV_JOB_TYPE_GPU_TFU,
963    V3DV_JOB_TYPE_GPU_CSD,
964    V3DV_JOB_TYPE_CPU_RESET_QUERIES,
965    V3DV_JOB_TYPE_CPU_END_QUERY,
966    V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
967    V3DV_JOB_TYPE_CPU_SET_EVENT,
968    V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
969    V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
970    V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
971    V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
972 };
973 
974 struct v3dv_reset_query_cpu_job_info {
975    struct v3dv_query_pool *pool;
976    uint32_t first;
977    uint32_t count;
978 };
979 
980 struct v3dv_end_query_cpu_job_info {
981    struct v3dv_query_pool *pool;
982    uint32_t query;
983 
984    /* This is one unless multiview is used */
985    uint32_t count;
986 };
987 
988 struct v3dv_copy_query_results_cpu_job_info {
989    struct v3dv_query_pool *pool;
990    uint32_t first;
991    uint32_t count;
992    struct v3dv_buffer *dst;
993    uint32_t offset;
994    uint32_t stride;
995    VkQueryResultFlags flags;
996 };
997 
998 struct v3dv_submit_info_semaphores {
999    /* List of semaphores to wait before running a job */
1000    uint32_t wait_sem_count;
1001    VkSemaphore *wait_sems;
1002 
1003    /* List of semaphores to signal when all jobs complete */
1004    uint32_t signal_sem_count;
1005    VkSemaphore *signal_sems;
1006 
1007    /* A fence to signal when all jobs complete */
1008    VkFence fence;
1009 };
1010 
1011 struct v3dv_event_set_cpu_job_info {
1012    struct v3dv_event *event;
1013    int state;
1014 };
1015 
1016 struct v3dv_event_wait_cpu_job_info {
1017    /* List of events to wait on */
1018    uint32_t event_count;
1019    struct v3dv_event **events;
1020 };
1021 
1022 struct v3dv_copy_buffer_to_image_cpu_job_info {
1023    struct v3dv_image *image;
1024    struct v3dv_buffer *buffer;
1025    uint32_t buffer_offset;
1026    uint32_t buffer_stride;
1027    uint32_t buffer_layer_stride;
1028    VkOffset3D image_offset;
1029    VkExtent3D image_extent;
1030    uint32_t mip_level;
1031    uint32_t base_layer;
1032    uint32_t layer_count;
1033 };
1034 
1035 struct v3dv_csd_indirect_cpu_job_info {
1036    struct v3dv_buffer *buffer;
1037    uint32_t offset;
1038    struct v3dv_job *csd_job;
1039    uint32_t wg_size;
1040    uint32_t *wg_uniform_offsets[3];
1041    bool needs_wg_uniform_rewrite;
1042 };
1043 
1044 struct v3dv_timestamp_query_cpu_job_info {
1045    struct v3dv_query_pool *pool;
1046    uint32_t query;
1047 
1048    /* This is one unless multiview is used */
1049    uint32_t count;
1050 };
1051 
1052 struct v3dv_job {
1053    struct list_head list_link;
1054 
1055    /* We only create job clones when executing secondary command buffers into
1056     * primaries. These clones don't make deep copies of the original object
1057     * so we want to flag them to avoid freeing resources they don't own.
1058     */
1059    bool is_clone;
1060 
1061    enum v3dv_job_type type;
1062 
1063    struct v3dv_device *device;
1064 
1065    struct v3dv_cmd_buffer *cmd_buffer;
1066 
1067    struct v3dv_cl bcl;
1068    struct v3dv_cl rcl;
1069    struct v3dv_cl indirect;
1070 
1071    /* Set of all BOs referenced by the job. This will be used for making
1072     * the list of BOs that the kernel will need to have paged in to
1073     * execute our job.
1074     */
1075    struct set *bos;
1076    uint32_t bo_count;
1077    uint64_t bo_handle_mask;
1078 
1079    struct v3dv_bo *tile_alloc;
1080    struct v3dv_bo *tile_state;
1081 
1082    bool tmu_dirty_rcl;
1083 
1084    uint32_t first_subpass;
1085 
1086    /* When the current subpass is split into multiple jobs, this flag is set
1087     * to true for any jobs after the first in the same subpass.
1088     */
1089    bool is_subpass_continue;
1090 
1091    /* If this job is the last job emitted for a subpass. */
1092    bool is_subpass_finish;
1093 
1094    struct v3dv_frame_tiling frame_tiling;
1095 
1096    enum v3dv_ez_state ez_state;
1097    enum v3dv_ez_state first_ez_state;
1098 
1099    /* If we have already decided if we need to disable Early Z/S completely
1100     * for this job.
1101     */
1102    bool decided_global_ez_enable;
1103 
1104    /* If this job has been configured to use early Z/S clear */
1105    bool early_zs_clear;
1106 
1107    /* Number of draw calls recorded into the job */
1108    uint32_t draw_count;
1109 
1110    /* A flag indicating whether we want to flush every draw separately. This
1111     * can be used for debugging, or for cases where special circumstances
1112     * require this behavior.
1113     */
1114    bool always_flush;
1115 
1116    /* Whether we need to serialize this job in our command stream */
1117    bool serialize;
1118 
1119    /* Whether this job is in charge of signalling semaphores */
1120    bool do_sem_signal;
1121 
1122    /* If this is a CL job, whether we should sync before binning */
1123    bool needs_bcl_sync;
1124 
1125    /* Job specs for CPU jobs */
1126    union {
1127       struct v3dv_reset_query_cpu_job_info          query_reset;
1128       struct v3dv_end_query_cpu_job_info            query_end;
1129       struct v3dv_copy_query_results_cpu_job_info   query_copy_results;
1130       struct v3dv_event_set_cpu_job_info            event_set;
1131       struct v3dv_event_wait_cpu_job_info           event_wait;
1132       struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1133       struct v3dv_csd_indirect_cpu_job_info         csd_indirect;
1134       struct v3dv_timestamp_query_cpu_job_info      query_timestamp;
1135    } cpu;
1136 
1137    /* Job specs for TFU jobs */
1138    struct drm_v3d_submit_tfu tfu;
1139 
1140    /* Job specs for CSD jobs */
1141    struct {
1142       struct v3dv_bo *shared_memory;
1143       uint32_t wg_count[3];
1144       uint32_t wg_base[3];
1145       struct drm_v3d_submit_csd submit;
1146    } csd;
1147 };
1148 
1149 struct v3dv_wait_thread_info {
1150    struct v3dv_job *job;
1151 
1152    /* Semaphores info for any postponed jobs after a wait event */
1153    struct v3dv_submit_info_semaphores *sems_info;
1154 };
1155 
1156 void v3dv_job_init(struct v3dv_job *job,
1157                    enum v3dv_job_type type,
1158                    struct v3dv_device *device,
1159                    struct v3dv_cmd_buffer *cmd_buffer,
1160                    int32_t subpass_idx);
1161 void v3dv_job_destroy(struct v3dv_job *job);
1162 
1163 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1164 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1165 
1166 void v3dv_job_start_frame(struct v3dv_job *job,
1167                           uint32_t width,
1168                           uint32_t height,
1169                           uint32_t layers,
1170                           bool allocate_tile_state_for_all_layers,
1171                           uint32_t render_target_count,
1172                           uint8_t max_internal_bpp,
1173                           bool msaa);
1174 
1175 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1176 
1177 struct v3dv_job *
1178 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1179                              struct v3dv_cmd_buffer *cmd_buffer);
1180 
1181 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1182                                                 enum v3dv_job_type type,
1183                                                 struct v3dv_cmd_buffer *cmd_buffer,
1184                                                 uint32_t subpass_idx);
1185 
1186 void
1187 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1188                                    uint32_t slot_size,
1189                                    uint32_t used_count,
1190                                    uint32_t *alloc_count,
1191                                    void **ptr);
1192 
1193 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
1194 
1195 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1196  * cmd_buffer specific header?
1197  */
1198 struct v3dv_draw_info {
1199    uint32_t vertex_count;
1200    uint32_t instance_count;
1201    uint32_t first_vertex;
1202    uint32_t first_instance;
1203 };
1204 
1205 struct v3dv_vertex_binding {
1206    struct v3dv_buffer *buffer;
1207    VkDeviceSize offset;
1208 };
1209 
1210 struct v3dv_descriptor_state {
1211    struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1212    uint32_t valid;
1213    uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1214 };
1215 
1216 struct v3dv_cmd_pipeline_state {
1217    struct v3dv_pipeline *pipeline;
1218 
1219    struct v3dv_descriptor_state descriptor_state;
1220 };
1221 
1222 struct v3dv_cmd_buffer_state {
1223    struct v3dv_render_pass *pass;
1224    struct v3dv_framebuffer *framebuffer;
1225    VkRect2D render_area;
1226 
1227    /* Current job being recorded */
1228    struct v3dv_job *job;
1229 
1230    uint32_t subpass_idx;
1231 
1232    struct v3dv_cmd_pipeline_state gfx;
1233    struct v3dv_cmd_pipeline_state compute;
1234 
1235    struct v3dv_dynamic_state dynamic;
1236 
1237    uint32_t dirty;
1238    VkShaderStageFlagBits dirty_descriptor_stages;
1239    VkShaderStageFlagBits dirty_push_constants_stages;
1240 
1241    /* Current clip window. We use this to check whether we have an active
1242     * scissor, since in that case we can't use TLB clears and need to fallback
1243     * to drawing rects.
1244     */
1245    VkRect2D clip_window;
1246 
1247    /* Whether our render area is aligned to tile boundaries. If this is false
1248     * then we have tiles that are only partially covered by the render area,
1249     * and therefore, we need to be careful with our loads and stores so we don't
1250     * modify pixels for the tile area that is not covered by the render area.
1251     * This means, for example, that we can't use the TLB to clear, since that
1252     * always clears full tiles.
1253     */
1254    bool tile_aligned_render_area;
1255 
1256    uint32_t attachment_alloc_count;
1257    struct v3dv_cmd_buffer_attachment_state *attachments;
1258 
1259    struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1260 
1261    struct {
1262       VkBuffer buffer;
1263       VkDeviceSize offset;
1264       uint8_t index_size;
1265    } index_buffer;
1266 
1267    /* Current uniforms */
1268    struct {
1269       struct v3dv_cl_reloc vs_bin;
1270       struct v3dv_cl_reloc vs;
1271       struct v3dv_cl_reloc gs_bin;
1272       struct v3dv_cl_reloc gs;
1273       struct v3dv_cl_reloc fs;
1274    } uniforms;
1275 
1276    /* Current view index for multiview rendering */
1277    uint32_t view_index;
1278 
1279    /* Used to flag OOM conditions during command buffer recording */
1280    bool oom;
1281 
1282    /* Whether we have recorded a pipeline barrier that we still need to
1283     * process.
1284     */
1285    bool has_barrier;
1286    bool has_bcl_barrier;
1287 
1288    /* Secondary command buffer state */
1289    struct {
1290       bool occlusion_query_enable;
1291    } inheritance;
1292 
1293    /* Command buffer state saved during a meta operation */
1294    struct {
1295       uint32_t subpass_idx;
1296       VkRenderPass pass;
1297       VkFramebuffer framebuffer;
1298 
1299       uint32_t attachment_alloc_count;
1300       uint32_t attachment_count;
1301       struct v3dv_cmd_buffer_attachment_state *attachments;
1302 
1303       bool tile_aligned_render_area;
1304       VkRect2D render_area;
1305 
1306       struct v3dv_dynamic_state dynamic;
1307 
1308       struct v3dv_cmd_pipeline_state gfx;
1309       bool has_descriptor_state;
1310 
1311       uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1312    } meta;
1313 
1314    /* Command buffer state for queries */
1315    struct {
1316       /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1317        * a render pass. We queue these here and then schedule the corresponding
1318        * CPU jobs for them at the time we finish the GPU job in which they have
1319        * been recorded.
1320        */
1321       struct {
1322          uint32_t used_count;
1323          uint32_t alloc_count;
1324          struct v3dv_end_query_cpu_job_info *states;
1325       } end;
1326 
1327       /* This BO is not NULL if we have an active query, that is, we have
1328        * called vkCmdBeginQuery but not vkCmdEndQuery.
1329        */
1330       struct {
1331          struct v3dv_bo *bo;
1332          uint32_t offset;
1333       } active_query;
1334    } query;
1335 };
1336 
1337 /* The following struct represents the info from a descriptor that we store on
1338  * the host memory. They are mostly links to other existing vulkan objects,
1339  * like the image_view in order to access to swizzle info, or the buffer used
1340  * for a UBO/SSBO, for example.
1341  *
1342  * FIXME: revisit if makes sense to just move everything that would be needed
1343  * from a descriptor to the bo.
1344  */
1345 struct v3dv_descriptor {
1346    VkDescriptorType type;
1347 
1348    union {
1349       struct {
1350          struct v3dv_image_view *image_view;
1351          struct v3dv_sampler *sampler;
1352       };
1353 
1354       struct {
1355          struct v3dv_buffer *buffer;
1356          size_t offset;
1357          size_t range;
1358       };
1359 
1360       struct v3dv_buffer_view *buffer_view;
1361    };
1362 };
1363 
1364 struct v3dv_query {
1365    bool maybe_available;
1366    union {
1367       /* Used by GPU queries (occlusion) */
1368       struct {
1369          struct v3dv_bo *bo;
1370          uint32_t offset;
1371       };
1372       /* Used by CPU queries (timestamp) */
1373       uint64_t value;
1374    };
1375 };
1376 
1377 struct v3dv_query_pool {
1378    struct vk_object_base base;
1379 
1380    struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1381 
1382    VkQueryType query_type;
1383    uint32_t query_count;
1384    struct v3dv_query *queries;
1385 };
1386 
1387 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1388                                          struct v3dv_query_pool *pool,
1389                                          uint32_t first,
1390                                          uint32_t count,
1391                                          void *data,
1392                                          VkDeviceSize stride,
1393                                          VkQueryResultFlags flags);
1394 
1395 void v3dv_reset_query_pools(struct v3dv_device *device,
1396                             struct v3dv_query_pool *query_pool,
1397                             uint32_t first,
1398                             uint32_t last);
1399 
1400 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1401                                                        uint64_t pobj,
1402                                                        VkAllocationCallbacks *alloc);
1403 struct v3dv_cmd_buffer_private_obj {
1404    struct list_head list_link;
1405    uint64_t obj;
1406    v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1407 };
1408 
1409 struct v3dv_cmd_buffer {
1410    struct vk_command_buffer vk;
1411 
1412    struct v3dv_device *device;
1413 
1414    /* Used at submit time to link command buffers in the submission that have
1415     * spawned wait threads, so we can then wait on all of them to complete
1416     * before we process any signal sempahores or fences.
1417     */
1418    struct list_head list_link;
1419 
1420    VkCommandBufferUsageFlags usage_flags;
1421 
1422    enum v3dv_cmd_buffer_status status;
1423 
1424    struct v3dv_cmd_buffer_state state;
1425 
1426    /* FIXME: we have just one client-side and bo for the push constants,
1427     * independently of the stageFlags in vkCmdPushConstants, and the
1428     * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1429     * tunning in the future if it makes sense.
1430     */
1431    uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1432    struct v3dv_cl_reloc push_constants_resource;
1433 
1434    /* Collection of Vulkan objects created internally by the driver (typically
1435     * during recording of meta operations) that are part of the command buffer
1436     * and should be destroyed with it.
1437     */
1438    struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1439 
1440    /* Per-command buffer resources for meta operations. */
1441    struct {
1442       struct {
1443          /* The current descriptor pool for blit sources */
1444          VkDescriptorPool dspool;
1445       } blit;
1446       struct {
1447          /* The current descriptor pool for texel buffer copy sources */
1448          VkDescriptorPool dspool;
1449       } texel_buffer_copy;
1450    } meta;
1451 
1452    /* List of jobs in the command buffer. For primary command buffers it
1453     * represents the jobs we want to submit to the GPU. For secondary command
1454     * buffers it represents jobs that will be merged into a primary command
1455     * buffer via vkCmdExecuteCommands.
1456     */
1457    struct list_head jobs;
1458 };
1459 
1460 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1461                                            int32_t subpass_idx,
1462                                            enum v3dv_job_type type);
1463 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1464 
1465 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1466                                                uint32_t subpass_idx);
1467 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1468                                                 uint32_t subpass_idx);
1469 
1470 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1471 
1472 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1473                                      bool push_descriptor_state);
1474 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1475                                     uint32_t dirty_dynamic_state,
1476                                     bool needs_subpass_resume);
1477 
1478 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1479                                    struct v3dv_query_pool *pool,
1480                                    uint32_t first,
1481                                    uint32_t count);
1482 
1483 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1484                                  struct v3dv_query_pool *pool,
1485                                  uint32_t query,
1486                                  VkQueryControlFlags flags);
1487 
1488 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1489                                struct v3dv_query_pool *pool,
1490                                uint32_t query);
1491 
1492 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1493                                         struct v3dv_query_pool *pool,
1494                                         uint32_t first,
1495                                         uint32_t count,
1496                                         struct v3dv_buffer *dst,
1497                                         uint32_t offset,
1498                                         uint32_t stride,
1499                                         VkQueryResultFlags flags);
1500 
1501 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1502                                  struct drm_v3d_submit_tfu *tfu);
1503 
1504 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1505                                               const uint32_t *wg_counts);
1506 
1507 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1508                                      uint64_t obj,
1509                                      v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1510 
1511 struct v3dv_semaphore {
1512    struct vk_object_base base;
1513 
1514    /* A syncobject handle associated with this semaphore */
1515    uint32_t sync;
1516 
1517    /* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
1518    uint32_t temp_sync;
1519    bool has_temp;
1520 };
1521 
1522 struct v3dv_fence {
1523    struct vk_object_base base;
1524 
1525    /* A syncobject handle associated with this fence */
1526    uint32_t sync;
1527 
1528    /* A temporary syncobject handle produced from a vkImportFenceFd. */
1529    uint32_t temp_sync;
1530    bool has_temp;
1531 };
1532 
1533 struct v3dv_event {
1534    struct vk_object_base base;
1535    int state;
1536 };
1537 
1538 struct v3dv_shader_variant {
1539    enum broadcom_shader_stage stage;
1540 
1541    union {
1542       struct v3d_prog_data *base;
1543       struct v3d_vs_prog_data *vs;
1544       struct v3d_gs_prog_data *gs;
1545       struct v3d_fs_prog_data *fs;
1546       struct v3d_compute_prog_data *cs;
1547    } prog_data;
1548 
1549    /* We explicitly save the prog_data_size as it would make easier to
1550     * serialize
1551     */
1552    uint32_t prog_data_size;
1553 
1554    /* The assembly for this variant will be uploaded to a BO shared with all
1555     * other shader stages in that pipeline. This is the offset in that BO.
1556     */
1557    uint32_t assembly_offset;
1558 
1559    /* Note: it is really likely that qpu_insts would be NULL, as it will be
1560     * used only temporarily, to upload it to the shared bo, as we compile the
1561     * different stages individually.
1562     */
1563    uint64_t *qpu_insts;
1564    uint32_t qpu_insts_size;
1565 };
1566 
1567 /*
1568  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1569  * other methods doesn't have so many parameters.
1570  *
1571  * FIXME: for the case of the coordinate shader and the vertex shader, module,
1572  * entrypoint, spec_info and nir are the same. There are also info only
1573  * relevant to some stages. But seemed too much a hassle to create a new
1574  * struct only to handle that. Revisit if such kind of info starts to grow.
1575  */
1576 struct v3dv_pipeline_stage {
1577    struct v3dv_pipeline *pipeline;
1578 
1579    enum broadcom_shader_stage stage;
1580 
1581    const struct vk_shader_module *module;
1582    const char *entrypoint;
1583    const VkSpecializationInfo *spec_info;
1584 
1585    nir_shader *nir;
1586 
1587    /* The following is the combined hash of module+entrypoint+spec_info+nir */
1588    unsigned char shader_sha1[20];
1589 
1590    /** A name for this program, so you can track it in shader-db output. */
1591    uint32_t program_id;
1592 
1593    VkPipelineCreationFeedbackEXT feedback;
1594 };
1595 
1596 /* We are using the descriptor pool entry for two things:
1597  * * Track the allocated sets, so we can properly free it if needed
1598  * * Track the suballocated pool bo regions, so if some descriptor set is
1599  *   freed, the gap could be reallocated later.
1600  *
1601  * Those only make sense if the pool was not created with the flag
1602  * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1603  */
1604 struct v3dv_descriptor_pool_entry
1605 {
1606    struct v3dv_descriptor_set *set;
1607    /* Offset and size of the subregion allocated for this entry from the
1608     * pool->bo
1609     */
1610    uint32_t offset;
1611    uint32_t size;
1612 };
1613 
1614 struct v3dv_descriptor_pool {
1615    struct vk_object_base base;
1616 
1617    /* If this descriptor pool has been allocated for the driver for internal
1618     * use, typically to implement meta operations.
1619     */
1620    bool is_driver_internal;
1621 
1622    struct v3dv_bo *bo;
1623    /* Current offset at the descriptor bo. 0 means that we didn't use it for
1624     * any descriptor. If the descriptor bo is NULL, current offset is
1625     * meaningless
1626     */
1627    uint32_t current_offset;
1628 
1629    /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1630     * descriptor sets are handled as a whole as pool memory and handled by the
1631     * following pointers. If set, they are not used, and individually
1632     * descriptor sets are allocated/freed.
1633     */
1634    uint8_t *host_memory_base;
1635    uint8_t *host_memory_ptr;
1636    uint8_t *host_memory_end;
1637 
1638    uint32_t entry_count;
1639    uint32_t max_entry_count;
1640    struct v3dv_descriptor_pool_entry entries[0];
1641 };
1642 
1643 struct v3dv_descriptor_set {
1644    struct vk_object_base base;
1645 
1646    struct v3dv_descriptor_pool *pool;
1647 
1648    struct v3dv_descriptor_set_layout *layout;
1649 
1650    /* Offset relative to the descriptor pool bo for this set */
1651    uint32_t base_offset;
1652 
1653    /* The descriptors below can be indexed (set/binding) using the set_layout
1654     */
1655    struct v3dv_descriptor descriptors[0];
1656 };
1657 
1658 struct v3dv_descriptor_set_binding_layout {
1659    VkDescriptorType type;
1660 
1661    /* Number of array elements in this binding */
1662    uint32_t array_size;
1663 
1664    /* Index into the flattend descriptor set */
1665    uint32_t descriptor_index;
1666 
1667    uint32_t dynamic_offset_count;
1668    uint32_t dynamic_offset_index;
1669 
1670    /* Offset into the descriptor set where this descriptor lives (final offset
1671     * on the descriptor bo need to take into account set->base_offset)
1672     */
1673    uint32_t descriptor_offset;
1674 
1675    /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1676     * if there are no immutable samplers.
1677     */
1678    uint32_t immutable_samplers_offset;
1679 };
1680 
1681 struct v3dv_descriptor_set_layout {
1682    struct vk_object_base base;
1683 
1684    VkDescriptorSetLayoutCreateFlags flags;
1685 
1686    /* Number of bindings in this descriptor set */
1687    uint32_t binding_count;
1688 
1689    /* Total bo size needed for this descriptor set
1690     */
1691    uint32_t bo_size;
1692 
1693    /* Shader stages affected by this descriptor set */
1694    uint16_t shader_stages;
1695 
1696    /* Number of descriptors in this descriptor set */
1697    uint32_t descriptor_count;
1698 
1699    /* Number of dynamic offsets used by this descriptor set */
1700    uint16_t dynamic_offset_count;
1701 
1702    /* Descriptor set layouts can be destroyed even if they are still being
1703     * used.
1704     */
1705    uint32_t ref_cnt;
1706 
1707    /* Bindings in this descriptor set */
1708    struct v3dv_descriptor_set_binding_layout binding[0];
1709 };
1710 
1711 void
1712 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
1713                                    struct v3dv_descriptor_set_layout *set_layout);
1714 
1715 static inline void
v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout * set_layout)1716 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
1717 {
1718    assert(set_layout && set_layout->ref_cnt >= 1);
1719    p_atomic_inc(&set_layout->ref_cnt);
1720 }
1721 
1722 static inline void
v3dv_descriptor_set_layout_unref(struct v3dv_device * device,struct v3dv_descriptor_set_layout * set_layout)1723 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
1724                                  struct v3dv_descriptor_set_layout *set_layout)
1725 {
1726    assert(set_layout && set_layout->ref_cnt >= 1);
1727    if (p_atomic_dec_zero(&set_layout->ref_cnt))
1728       v3dv_descriptor_set_layout_destroy(device, set_layout);
1729 }
1730 
1731 struct v3dv_pipeline_layout {
1732    struct vk_object_base base;
1733 
1734    struct {
1735       struct v3dv_descriptor_set_layout *layout;
1736       uint32_t dynamic_offset_start;
1737    } set[MAX_SETS];
1738 
1739    uint32_t num_sets;
1740 
1741    /* Shader stages that are declared to use descriptors from this layout */
1742    uint32_t shader_stages;
1743 
1744    uint32_t dynamic_offset_count;
1745    uint32_t push_constant_size;
1746 };
1747 
1748 /*
1749  * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1750  * it to be big enough to include the max value for all of them.
1751  *
1752  * FIXME: one alternative would be to allocate the map as big as you need for
1753  * each descriptor type. That would means more individual allocations.
1754  */
1755 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS,                         \
1756                                  MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
1757                                  MAX_STORAGE_BUFFERS)
1758 
1759 
1760 struct v3dv_descriptor_map {
1761    /* TODO: avoid fixed size array/justify the size */
1762    unsigned num_desc; /* Number of descriptors  */
1763    int set[DESCRIPTOR_MAP_SIZE];
1764    int binding[DESCRIPTOR_MAP_SIZE];
1765    int array_index[DESCRIPTOR_MAP_SIZE];
1766    int array_size[DESCRIPTOR_MAP_SIZE];
1767    bool used[DESCRIPTOR_MAP_SIZE];
1768 
1769    /* NOTE: the following is only for sampler, but this is the easier place to
1770     * put it.
1771     */
1772    uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1773 };
1774 
1775 struct v3dv_sampler {
1776    struct vk_object_base base;
1777 
1778    bool compare_enable;
1779    bool unnormalized_coordinates;
1780    bool clamp_to_transparent_black_border;
1781 
1782    /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1783     * configuration. If needed it will be copied to the descriptor info during
1784     * UpdateDescriptorSets
1785     */
1786    uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1787 };
1788 
1789 struct v3dv_descriptor_template_entry {
1790    /* The type of descriptor in this entry */
1791    VkDescriptorType type;
1792 
1793    /* Binding in the descriptor set */
1794    uint32_t binding;
1795 
1796    /* Offset at which to write into the descriptor set binding */
1797    uint32_t array_element;
1798 
1799    /* Number of elements to write into the descriptor set binding */
1800    uint32_t array_count;
1801 
1802    /* Offset into the user provided data */
1803    size_t offset;
1804 
1805    /* Stride between elements into the user provided data */
1806    size_t stride;
1807 };
1808 
1809 struct v3dv_descriptor_update_template {
1810    struct vk_object_base base;
1811 
1812    VkPipelineBindPoint bind_point;
1813 
1814    /* The descriptor set this template corresponds to. This value is only
1815     * valid if the template was created with the templateType
1816     * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1817     */
1818    uint8_t set;
1819 
1820    /* Number of entries in this template */
1821    uint32_t entry_count;
1822 
1823    /* Entries of the template */
1824    struct v3dv_descriptor_template_entry entries[0];
1825 };
1826 
1827 
1828 /* We keep two special values for the sampler idx that represents exactly when a
1829  * sampler is not needed/provided. The main use is that even if we don't have
1830  * sampler, we still need to do the output unpacking (through
1831  * nir_lower_tex). The easier way to do this is to add those special "no
1832  * sampler" in the sampler_map, and then use the proper unpacking for that
1833  * case.
1834  *
1835  * We have one when we want a 16bit output size, and other when we want a
1836  * 32bit output size. We use the info coming from the RelaxedPrecision
1837  * decoration to decide between one and the other.
1838  */
1839 #define V3DV_NO_SAMPLER_16BIT_IDX 0
1840 #define V3DV_NO_SAMPLER_32BIT_IDX 1
1841 
1842 /*
1843  * Following two methods are using on the combined to/from texture/sampler
1844  * indices maps at v3dv_pipeline.
1845  */
1846 static inline uint32_t
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,uint32_t sampler_index)1847 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1848                                         uint32_t sampler_index)
1849 {
1850    return texture_index << 24 | sampler_index;
1851 }
1852 
1853 static inline void
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,uint32_t * texture_index,uint32_t * sampler_index)1854 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1855                                         uint32_t *texture_index,
1856                                         uint32_t *sampler_index)
1857 {
1858    uint32_t texture = combined_index_key >> 24;
1859    uint32_t sampler = combined_index_key & 0xffffff;
1860 
1861    if (texture_index)
1862       *texture_index = texture;
1863 
1864    if (sampler_index)
1865       *sampler_index = sampler;
1866 }
1867 
1868 struct v3dv_descriptor_maps {
1869    struct v3dv_descriptor_map ubo_map;
1870    struct v3dv_descriptor_map ssbo_map;
1871    struct v3dv_descriptor_map sampler_map;
1872    struct v3dv_descriptor_map texture_map;
1873 };
1874 
1875 /* The structure represents data shared between different objects, like the
1876  * pipeline and the pipeline cache, so we ref count it to know when it should
1877  * be freed.
1878  */
1879 struct v3dv_pipeline_shared_data {
1880    uint32_t ref_cnt;
1881 
1882    unsigned char sha1_key[20];
1883 
1884    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1885    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1886 
1887    struct v3dv_bo *assembly_bo;
1888 };
1889 
1890 struct v3dv_pipeline {
1891    struct vk_object_base base;
1892 
1893    struct v3dv_device *device;
1894 
1895    VkShaderStageFlags active_stages;
1896 
1897    struct v3dv_render_pass *pass;
1898    struct v3dv_subpass *subpass;
1899 
1900    /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1901     * to track binning shaders. Note these will be freed once the pipeline
1902     * has been compiled.
1903     */
1904    struct v3dv_pipeline_stage *vs;
1905    struct v3dv_pipeline_stage *vs_bin;
1906    struct v3dv_pipeline_stage *gs;
1907    struct v3dv_pipeline_stage *gs_bin;
1908    struct v3dv_pipeline_stage *fs;
1909    struct v3dv_pipeline_stage *cs;
1910 
1911    /* Flags for whether optional pipeline stages are present, for convenience */
1912    bool has_gs;
1913 
1914    /* Spilling memory requirements */
1915    struct {
1916       struct v3dv_bo *bo;
1917       uint32_t size_per_thread;
1918    } spill;
1919 
1920    struct v3dv_dynamic_state dynamic_state;
1921 
1922    struct v3dv_pipeline_layout *layout;
1923 
1924    /* Whether this pipeline enables depth writes */
1925    bool z_updates_enable;
1926 
1927    enum v3dv_ez_state ez_state;
1928 
1929    bool msaa;
1930    bool sample_rate_shading;
1931    uint32_t sample_mask;
1932 
1933    bool primitive_restart;
1934 
1935    /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
1936     * array with such binding
1937     */
1938    struct v3dv_pipeline_vertex_binding {
1939       uint32_t stride;
1940       uint32_t instance_divisor;
1941    } vb[MAX_VBS];
1942    uint32_t vb_count;
1943 
1944    /* Note that a lot of info from VkVertexInputAttributeDescription is
1945     * already prepacked, so here we are only storing those that need recheck
1946     * later. The array must be indexed by driver location, since that is the
1947     * order in which we need to emit the attributes.
1948     */
1949    struct v3dv_pipeline_vertex_attrib {
1950       uint32_t binding;
1951       uint32_t offset;
1952       VkFormat vk_format;
1953    } va[MAX_VERTEX_ATTRIBS];
1954    uint32_t va_count;
1955 
1956    enum pipe_prim_type topology;
1957 
1958    struct v3dv_pipeline_shared_data *shared_data;
1959 
1960    /* It is the combined stages sha1, plus the pipeline key sha1. */
1961    unsigned char sha1[20];
1962 
1963    /* In general we can reuse v3dv_device->default_attribute_float, so note
1964     * that the following can be NULL.
1965     *
1966     * FIXME: the content of this BO will be small, so it could be improved to
1967     * be uploaded to a common BO. But as in most cases it will be NULL, it is
1968     * not a priority.
1969     */
1970    struct v3dv_bo *default_attribute_values;
1971 
1972    struct vpm_config vpm_cfg;
1973    struct vpm_config vpm_cfg_bin;
1974 
1975    /* If the pipeline should emit any of the stencil configuration packets */
1976    bool emit_stencil_cfg[2];
1977 
1978    /* Blend state */
1979    struct {
1980       /* Per-RT bit mask with blend enables */
1981       uint8_t enables;
1982       /* Per-RT prepacked blend config packets */
1983       uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
1984       /* Flag indicating whether the blend factors in use require
1985        * color constants.
1986        */
1987       bool needs_color_constants;
1988       /* Mask with enabled color channels for each RT (4 bits per RT) */
1989       uint32_t color_write_masks;
1990    } blend;
1991 
1992    /* Depth bias */
1993    struct {
1994       bool enabled;
1995       bool is_z16;
1996    } depth_bias;
1997 
1998    /* Packets prepacked during pipeline creation
1999     */
2000    uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
2001    uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
2002    uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
2003    uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
2004                         MAX_VERTEX_ATTRIBS];
2005    uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
2006 };
2007 
2008 static inline VkPipelineBindPoint
v3dv_pipeline_get_binding_point(struct v3dv_pipeline * pipeline)2009 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
2010 {
2011    assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
2012           !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
2013    return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
2014       VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
2015 }
2016 
2017 static inline struct v3dv_descriptor_state*
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)2018 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
2019                                      struct v3dv_pipeline *pipeline)
2020 {
2021    if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
2022       return &cmd_buffer->state.compute.descriptor_state;
2023    else
2024       return &cmd_buffer->state.gfx.descriptor_state;
2025 }
2026 
2027 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
2028 
2029 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
2030 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
2031 
2032 #define v3dv_debug_ignored_stype(sType) \
2033    mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
2034 
2035 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
2036 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
2037 const struct v3dv_format *
2038 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
2039                                uint32_t bpp, VkFormat *out_vk_format);
2040 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
2041                                           VkFormat vk_format,
2042                                           VkFormatFeatureFlags features);
2043 
2044 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
2045                                          struct v3dv_pipeline *pipeline,
2046                                          struct v3dv_shader_variant *variant);
2047 
2048 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
2049                                                     struct v3dv_pipeline *pipeline,
2050                                                     struct v3dv_shader_variant *variant,
2051                                                     uint32_t **wg_count_offsets);
2052 
2053 struct v3dv_shader_variant *
2054 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
2055                         struct v3dv_pipeline_cache *cache,
2056                         struct v3d_key *key,
2057                         size_t key_size,
2058                         const VkAllocationCallbacks *pAllocator,
2059                         VkResult *out_vk_result);
2060 
2061 struct v3dv_shader_variant *
2062 v3dv_shader_variant_create(struct v3dv_device *device,
2063                            enum broadcom_shader_stage stage,
2064                            struct v3d_prog_data *prog_data,
2065                            uint32_t prog_data_size,
2066                            uint32_t assembly_offset,
2067                            uint64_t *qpu_insts,
2068                            uint32_t qpu_insts_size,
2069                            VkResult *out_vk_result);
2070 
2071 void
2072 v3dv_shader_variant_destroy(struct v3dv_device *device,
2073                             struct v3dv_shader_variant *variant);
2074 
2075 static inline void
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data * shared_data)2076 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2077 {
2078    assert(shared_data && shared_data->ref_cnt >= 1);
2079    p_atomic_inc(&shared_data->ref_cnt);
2080 }
2081 
2082 void
2083 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2084                                   struct v3dv_pipeline_shared_data *shared_data);
2085 
2086 static inline void
v3dv_pipeline_shared_data_unref(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)2087 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2088                                 struct v3dv_pipeline_shared_data *shared_data)
2089 {
2090    assert(shared_data && shared_data->ref_cnt >= 1);
2091    if (p_atomic_dec_zero(&shared_data->ref_cnt))
2092       v3dv_pipeline_shared_data_destroy(device, shared_data);
2093 }
2094 
2095 struct v3dv_descriptor *
2096 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2097                                    struct v3dv_descriptor_map *map,
2098                                    struct v3dv_pipeline_layout *pipeline_layout,
2099                                    uint32_t index,
2100                                    uint32_t *dynamic_offset);
2101 
2102 struct v3dv_cl_reloc
2103 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2104                                       struct v3dv_descriptor_state *descriptor_state,
2105                                       struct v3dv_descriptor_map *map,
2106                                       struct v3dv_pipeline_layout *pipeline_layout,
2107                                       uint32_t index,
2108                                       VkDescriptorType *out_type);
2109 
2110 const struct v3dv_sampler *
2111 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2112                                 struct v3dv_descriptor_map *map,
2113                                 struct v3dv_pipeline_layout *pipeline_layout,
2114                                 uint32_t index);
2115 
2116 struct v3dv_cl_reloc
2117 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2118                                       struct v3dv_descriptor_state *descriptor_state,
2119                                       struct v3dv_descriptor_map *map,
2120                                       struct v3dv_pipeline_layout *pipeline_layout,
2121                                       uint32_t index);
2122 
2123 struct v3dv_cl_reloc
2124 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2125                                              struct v3dv_descriptor_state *descriptor_state,
2126                                              struct v3dv_descriptor_map *map,
2127                                              struct v3dv_pipeline_layout *pipeline_layout,
2128                                              uint32_t index);
2129 
2130 struct v3dv_bo*
2131 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2132                                    struct v3dv_descriptor_map *map,
2133                                    struct v3dv_pipeline_layout *pipeline_layout,
2134                                    uint32_t index);
2135 
2136 static inline const struct v3dv_sampler *
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout * set,const struct v3dv_descriptor_set_binding_layout * binding)2137 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2138                         const struct v3dv_descriptor_set_binding_layout *binding)
2139 {
2140    assert(binding->immutable_samplers_offset);
2141    return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2142 }
2143 
2144 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2145                               struct v3dv_device *device,
2146                               VkPipelineCacheCreateFlags,
2147                               bool cache_enabled);
2148 
2149 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2150 
2151 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2152                                     struct v3dv_pipeline_cache *cache,
2153                                     nir_shader *nir,
2154                                     unsigned char sha1_key[20]);
2155 
2156 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2157                                                struct v3dv_pipeline_cache *cache,
2158                                                const nir_shader_compiler_options *nir_options,
2159                                                unsigned char sha1_key[20]);
2160 
2161 struct v3dv_pipeline_shared_data *
2162 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2163                                         unsigned char sha1_key[20],
2164                                         bool *cache_hit);
2165 
2166 void
2167 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2168                                     struct v3dv_pipeline_cache *cache);
2169 
2170 struct v3dv_bo *
2171 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2172                                               struct v3dv_pipeline *pipeline);
2173 
2174 void v3dv_shader_module_internal_init(struct v3dv_device *device,
2175                                       struct vk_shader_module *module,
2176                                       nir_shader *nir);
2177 
2178 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle)			\
2179    VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2180 
2181 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2182                        VK_OBJECT_TYPE_COMMAND_BUFFER)
2183 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2184 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2185                        VK_OBJECT_TYPE_INSTANCE)
2186 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2187                        VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2188 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2189 
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer,base,VkBuffer,VK_OBJECT_TYPE_BUFFER)2190 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2191                                VK_OBJECT_TYPE_BUFFER)
2192 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2193                                VK_OBJECT_TYPE_BUFFER_VIEW)
2194 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2195                                VK_OBJECT_TYPE_DEVICE_MEMORY)
2196 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2197                                VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2198 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2199                                VK_OBJECT_TYPE_DESCRIPTOR_SET)
2200 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2201                                VkDescriptorSetLayout,
2202                                VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2203 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2204                                VkDescriptorUpdateTemplate,
2205                                VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2206 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2207 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
2208 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2209                                VK_OBJECT_TYPE_FRAMEBUFFER)
2210 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2211                                VK_OBJECT_TYPE_IMAGE)
2212 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2213                                VK_OBJECT_TYPE_IMAGE_VIEW)
2214 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2215                                VK_OBJECT_TYPE_PIPELINE)
2216 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2217                                VK_OBJECT_TYPE_PIPELINE_CACHE)
2218 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2219                                VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2220 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2221                                VK_OBJECT_TYPE_QUERY_POOL)
2222 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2223                                VK_OBJECT_TYPE_RENDER_PASS)
2224 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2225                                VK_OBJECT_TYPE_SAMPLER)
2226 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore,
2227                                VK_OBJECT_TYPE_SEMAPHORE)
2228 
2229 static inline int
2230 v3dv_ioctl(int fd, unsigned long request, void *arg)
2231 {
2232    if (using_v3d_simulator)
2233       return v3d_simulator_ioctl(fd, request, arg);
2234    else
2235       return drmIoctl(fd, request, arg);
2236 }
2237 
2238 /* Flags OOM conditions in command buffer state.
2239  *
2240  * Note: notice that no-op jobs don't have a command buffer reference.
2241  */
2242 static inline void
v3dv_flag_oom(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_job * job)2243 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2244 {
2245    if (cmd_buffer) {
2246       cmd_buffer->state.oom = true;
2247    } else {
2248       assert(job);
2249       if (job->cmd_buffer)
2250          job->cmd_buffer->state.oom = true;
2251    }
2252 }
2253 
2254 #define v3dv_return_if_oom(_cmd_buffer, _job) do {                  \
2255    const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer;        \
2256    if (__cmd_buffer && __cmd_buffer->state.oom)                     \
2257       return;                                                       \
2258    const struct v3dv_job *__job = _job;                             \
2259    if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom)  \
2260       return;                                                       \
2261 } while(0)                                                          \
2262 
2263 static inline uint32_t
u64_hash(const void * key)2264 u64_hash(const void *key)
2265 {
2266    return _mesa_hash_data(key, sizeof(uint64_t));
2267 }
2268 
2269 static inline bool
u64_compare(const void * key1,const void * key2)2270 u64_compare(const void *key1, const void *key2)
2271 {
2272    return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2273 }
2274 
2275 /* Helper to call hw ver speficic functions */
2276 #define v3dv_X(device, thing) ({                      \
2277    __typeof(&v3d42_##thing) v3d_X_thing;              \
2278    switch (device->devinfo.ver) {                     \
2279    case 42:                                           \
2280       v3d_X_thing = &v3d42_##thing;                   \
2281       break;                                          \
2282    default:                                           \
2283       unreachable("Unsupported hardware generation"); \
2284    }                                                  \
2285    v3d_X_thing;                                       \
2286 })
2287 
2288 
2289 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2290  * define v3dX for each version supported, because when we compile code that
2291  * is not version-specific, all version-specific macros need to be already
2292  * defined.
2293  */
2294 #ifdef v3dX
2295 #  include "v3dvx_private.h"
2296 #else
2297 #  define v3dX(x) v3d42_##x
2298 #  include "v3dvx_private.h"
2299 #  undef v3dX
2300 #endif
2301 
2302 #ifdef ANDROID
2303 VkResult
2304 v3dv_gralloc_info(struct v3dv_device *device,
2305                   const VkNativeBufferANDROID *gralloc_info,
2306                   int *out_dmabuf,
2307                   int *out_stride,
2308                   int *out_size,
2309                   uint64_t *out_modifier);
2310 
2311 VkResult
2312 v3dv_import_native_buffer_fd(VkDevice device_h,
2313                              int dma_buf,
2314                              const VkAllocationCallbacks *alloc,
2315                              VkImage image_h);
2316 #endif /* ANDROID */
2317 
2318 #endif /* V3DV_PRIVATE_H */
2319