1 /*
2  * This file is part of libplacebo.
3  *
4  * libplacebo is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * libplacebo is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #pragma once
19 
20 #include "common.h"
21 #include "command.h"
22 #include "formats.h"
23 #include "malloc.h"
24 #include "utils.h"
25 
26 #include "../gpu.h"
27 #include "../pl_thread.h"
28 
29 pl_gpu pl_gpu_create_vk(struct vk_ctx *vk);
30 
31 // This function takes the current graphics command and steals it from the
32 // GPU, so the caller can do custom vk_cmd_ calls on it. The caller should
33 // submit it as well.
34 struct vk_cmd *pl_vk_steal_cmd(pl_gpu gpu);
35 
36 // Print memory usage statistics
37 void pl_vk_print_heap(pl_gpu, enum pl_log_level);
38 
39 // --- pl_gpu internal structs and helpers
40 
41 struct pl_fmt_vk {
42     const struct vk_format *vk_fmt;
43     bool blit_emulated;
44 };
45 
46 enum queue_type {
47     GRAPHICS,
48     COMPUTE,
49     TRANSFER,
50     ANY,
51 };
52 
53 struct pl_vk {
54     struct pl_gpu_fns impl;
55     struct vk_ctx *vk;
56     struct vk_malloc *alloc;
57     struct spirv_compiler *spirv;
58 
59     // Some additional cached device limits and features checks
60     uint32_t max_push_descriptors;
61     size_t min_texel_alignment;
62     bool host_query_reset;
63 
64     // This is a pl_dispatch used (on ourselves!) for the purposes of
65     // dispatching compute shaders for performing various emulation tasks
66     // (e.g. partial clears, blits or emulated texture transfers).
67     // Warning: Care must be taken to avoid recursive calls.
68     pl_dispatch dp;
69 
70     // The "currently recording" command. This will be queued and replaced by
71     // a new command every time we need to "switch" between queue families.
72     pl_mutex recording;
73     struct vk_cmd *cmd;
74     pl_timer cmd_timer;
75 
76     // Array of VkSamplers for every combination of sample/address modes
77     VkSampler samplers[PL_TEX_SAMPLE_MODE_COUNT][PL_TEX_ADDRESS_MODE_COUNT];
78 
79     // To avoid spamming warnings
80     bool warned_modless;
81 };
82 
83 struct vk_cmd *_begin_cmd(pl_gpu, enum queue_type, const char *label, pl_timer);
84 void _end_cmd(pl_gpu, struct vk_cmd **, bool submit);
85 
86 #define CMD_BEGIN(type)              _begin_cmd(gpu, type, __func__, NULL)
87 #define CMD_BEGIN_TIMED(type, timer) _begin_cmd(gpu, type, __func__, timer)
88 #define CMD_FINISH(cmd) _end_cmd(gpu, cmd, false)
89 #define CMD_SUBMIT(cmd) _end_cmd(gpu, cmd, true)
90 
91 struct pl_tex_vk {
92     pl_rc_t rc;
93     bool held;
94     bool external_img;
95     bool may_invalidate;
96     enum queue_type transfer_queue;
97     VkImageType type;
98     VkImage img;
99     struct vk_memslice mem;
100     // cached properties
101     VkFormat img_fmt;
102     VkImageUsageFlags usage_flags;
103     // for sampling
104     VkImageView view;
105     // for rendering
106     VkFramebuffer framebuffer;
107     // for vk_tex_upload/download fallback code
108     pl_fmt texel_fmt;
109     // "current" metadata, can change during the course of execution
110     VkImageLayout current_layout;
111     VkAccessFlags current_access;
112     // the signal guards reuse, and can be NULL
113     struct vk_signal *sig;
114     VkPipelineStageFlags sig_stage;
115     PL_ARRAY(VkSemaphore) ext_deps; // external semaphore, not owned by the pl_tex
116     pl_sync ext_sync; // indicates an exported image
117 };
118 
119 pl_tex vk_tex_create(pl_gpu, const struct pl_tex_params *);
120 void vk_tex_deref(pl_gpu, pl_tex);
121 void vk_tex_invalidate(pl_gpu, pl_tex);
122 void vk_tex_clear_ex(pl_gpu, pl_tex, const union pl_clear_color);
123 void vk_tex_blit(pl_gpu, const struct pl_tex_blit_params *);
124 bool vk_tex_upload(pl_gpu, const struct pl_tex_transfer_params *);
125 bool vk_tex_download(pl_gpu, const struct pl_tex_transfer_params *);
126 bool vk_tex_poll(pl_gpu, pl_tex, uint64_t timeout);
127 bool vk_tex_export(pl_gpu, pl_tex, pl_sync);
128 
129 // Small helper to ease image barrier creation. if `discard` is set, the
130 // contents of the image will be undefined after the barrier
131 void vk_tex_barrier(pl_gpu, struct vk_cmd *, pl_tex, VkPipelineStageFlags,
132                     VkAccessFlags, VkImageLayout, bool export);
133 void vk_tex_signal(pl_gpu, struct vk_cmd *, pl_tex, VkPipelineStageFlags);
134 
135 struct pl_buf_vk {
136     struct vk_memslice mem;
137     pl_rc_t rc;
138     int writes; // number of queued write commands
139     enum queue_type update_queue;
140     VkBufferView view; // for texel buffers
141     // "current" metadata, can change during course of execution
142     VkAccessFlags current_access;
143     bool exported;
144     bool needs_flush;
145     // the signal guards reuse, and can be NULL
146     struct vk_signal *sig;
147     VkPipelineStageFlags sig_stage;
148 };
149 
150 pl_buf vk_buf_create(pl_gpu, const struct pl_buf_params *);
151 void vk_buf_deref(pl_gpu, pl_buf);
152 void vk_buf_write(pl_gpu, pl_buf, size_t offset, const void *src, size_t size);
153 bool vk_buf_read(pl_gpu, pl_buf, size_t offset, void *dst, size_t size);
154 void vk_buf_copy(pl_gpu, pl_buf dst, size_t dst_offset,
155                  pl_buf src, size_t src_offset, size_t size);
156 bool vk_buf_export(pl_gpu, pl_buf);
157 bool vk_buf_poll(pl_gpu, pl_buf, uint64_t timeout);
158 
159 enum buffer_op {
160     BUF_READ    = (1 << 0),
161     BUF_WRITE   = (1 << 1),
162     BUF_EXPORT  = (1 << 2),
163 };
164 
165 // Helpers to ease buffer barrier creation. (`offset` is relative to pl_buf)
166 void vk_buf_barrier(pl_gpu, struct vk_cmd *, pl_buf, VkPipelineStageFlags,
167                     VkAccessFlags, size_t offset, size_t size, enum buffer_op);
168 void vk_buf_signal(pl_gpu, struct vk_cmd *, pl_buf, VkPipelineStageFlags);
169 
170 // Flush visible writes to a buffer made by the API
171 void vk_buf_flush(pl_gpu, struct vk_cmd *, pl_buf, size_t offset, size_t size);
172 
173 struct pl_pass_vk;
174 
175 int vk_desc_namespace(pl_gpu, enum pl_desc_type);
176 pl_pass vk_pass_create(pl_gpu, const struct pl_pass_params *);
177 void vk_pass_destroy(pl_gpu, pl_pass);
178 void vk_pass_run(pl_gpu, const struct pl_pass_run_params *);
179 
180 struct pl_sync_vk {
181     pl_rc_t rc;
182     VkSemaphore wait;
183     VkSemaphore signal;
184 };
185 
186 void vk_sync_deref(pl_gpu, pl_sync);
187