1 /*
2  * Copyright © 2016 Red Hat
3  * based on intel anv code:
4  * Copyright © 2015 Intel Corporation
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "radv_meta.h"
27 
28 #include "vk_util.h"
29 
30 #include <fcntl.h>
31 #include <limits.h>
32 #ifndef _WIN32
33 #include <pwd.h>
34 #endif
35 #include <sys/stat.h>
36 
37 void
radv_meta_save(struct radv_meta_saved_state * state,struct radv_cmd_buffer * cmd_buffer,uint32_t flags)38 radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer,
39                uint32_t flags)
40 {
41    VkPipelineBindPoint bind_point = flags & RADV_META_SAVE_GRAPHICS_PIPELINE
42                                        ? VK_PIPELINE_BIND_POINT_GRAPHICS
43                                        : VK_PIPELINE_BIND_POINT_COMPUTE;
44    struct radv_descriptor_state *descriptors_state =
45       radv_get_descriptors_state(cmd_buffer, bind_point);
46 
47    assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_COMPUTE_PIPELINE));
48 
49    state->flags = flags;
50 
51    if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
52       assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
53 
54       state->old_pipeline = cmd_buffer->state.pipeline;
55 
56       /* Save all viewports. */
57       state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
58       typed_memcpy(state->viewport.viewports, cmd_buffer->state.dynamic.viewport.viewports,
59                    MAX_VIEWPORTS);
60       typed_memcpy(state->viewport.xform, cmd_buffer->state.dynamic.viewport.xform,
61                    MAX_VIEWPORTS);
62 
63       /* Save all scissors. */
64       state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
65       typed_memcpy(state->scissor.scissors, cmd_buffer->state.dynamic.scissor.scissors,
66                    MAX_SCISSORS);
67 
68       state->cull_mode = cmd_buffer->state.dynamic.cull_mode;
69       state->front_face = cmd_buffer->state.dynamic.front_face;
70 
71       state->primitive_topology = cmd_buffer->state.dynamic.primitive_topology;
72 
73       state->depth_test_enable = cmd_buffer->state.dynamic.depth_test_enable;
74       state->depth_write_enable = cmd_buffer->state.dynamic.depth_write_enable;
75       state->depth_compare_op = cmd_buffer->state.dynamic.depth_compare_op;
76       state->depth_bounds_test_enable = cmd_buffer->state.dynamic.depth_bounds_test_enable;
77       state->stencil_test_enable = cmd_buffer->state.dynamic.stencil_test_enable;
78 
79       state->stencil_op.front.compare_op = cmd_buffer->state.dynamic.stencil_op.front.compare_op;
80       state->stencil_op.front.fail_op = cmd_buffer->state.dynamic.stencil_op.front.fail_op;
81       state->stencil_op.front.pass_op = cmd_buffer->state.dynamic.stencil_op.front.pass_op;
82       state->stencil_op.front.depth_fail_op =
83          cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op;
84 
85       state->stencil_op.back.compare_op = cmd_buffer->state.dynamic.stencil_op.back.compare_op;
86       state->stencil_op.back.fail_op = cmd_buffer->state.dynamic.stencil_op.back.fail_op;
87       state->stencil_op.back.pass_op = cmd_buffer->state.dynamic.stencil_op.back.pass_op;
88       state->stencil_op.back.depth_fail_op =
89          cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op;
90 
91       state->fragment_shading_rate.size = cmd_buffer->state.dynamic.fragment_shading_rate.size;
92       state->fragment_shading_rate.combiner_ops[0] =
93          cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0];
94       state->fragment_shading_rate.combiner_ops[1] =
95          cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1];
96 
97       state->depth_bias_enable = cmd_buffer->state.dynamic.depth_bias_enable;
98 
99       state->primitive_restart_enable = cmd_buffer->state.dynamic.primitive_restart_enable;
100 
101       state->rasterizer_discard_enable = cmd_buffer->state.dynamic.rasterizer_discard_enable;
102 
103       state->logic_op = cmd_buffer->state.dynamic.logic_op;
104 
105       state->color_write_enable = cmd_buffer->state.dynamic.color_write_enable;
106    }
107 
108    if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
109       typed_memcpy(&state->sample_location, &cmd_buffer->state.dynamic.sample_location, 1);
110    }
111 
112    if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
113       assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
114 
115       state->old_pipeline = cmd_buffer->state.compute_pipeline;
116    }
117 
118    if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
119       state->old_descriptor_set0 = descriptors_state->sets[0];
120       if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
121          state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
122    }
123 
124    if (state->flags & RADV_META_SAVE_CONSTANTS) {
125       memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE);
126    }
127 
128    if (state->flags & RADV_META_SAVE_PASS) {
129       state->pass = cmd_buffer->state.pass;
130       state->subpass = cmd_buffer->state.subpass;
131       state->framebuffer = cmd_buffer->state.framebuffer;
132       state->attachments = cmd_buffer->state.attachments;
133       state->render_area = cmd_buffer->state.render_area;
134    }
135 }
136 
137 void
radv_meta_restore(const struct radv_meta_saved_state * state,struct radv_cmd_buffer * cmd_buffer)138 radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
139 {
140    VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE
141                                        ? VK_PIPELINE_BIND_POINT_GRAPHICS
142                                        : VK_PIPELINE_BIND_POINT_COMPUTE;
143 
144    if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
145       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
146                            radv_pipeline_to_handle(state->old_pipeline));
147 
148       cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
149 
150       /* Restore all viewports. */
151       cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
152       typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports, state->viewport.viewports,
153                    MAX_VIEWPORTS);
154       typed_memcpy(cmd_buffer->state.dynamic.viewport.xform, state->viewport.xform,
155                    MAX_VIEWPORTS);
156 
157       /* Restore all scissors. */
158       cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
159       typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors, state->scissor.scissors,
160                    MAX_SCISSORS);
161 
162       cmd_buffer->state.dynamic.cull_mode = state->cull_mode;
163       cmd_buffer->state.dynamic.front_face = state->front_face;
164 
165       cmd_buffer->state.dynamic.primitive_topology = state->primitive_topology;
166 
167       cmd_buffer->state.dynamic.depth_test_enable = state->depth_test_enable;
168       cmd_buffer->state.dynamic.depth_write_enable = state->depth_write_enable;
169       cmd_buffer->state.dynamic.depth_compare_op = state->depth_compare_op;
170       cmd_buffer->state.dynamic.depth_bounds_test_enable = state->depth_bounds_test_enable;
171       cmd_buffer->state.dynamic.stencil_test_enable = state->stencil_test_enable;
172 
173       cmd_buffer->state.dynamic.stencil_op.front.compare_op = state->stencil_op.front.compare_op;
174       cmd_buffer->state.dynamic.stencil_op.front.fail_op = state->stencil_op.front.fail_op;
175       cmd_buffer->state.dynamic.stencil_op.front.pass_op = state->stencil_op.front.pass_op;
176       cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op =
177          state->stencil_op.front.depth_fail_op;
178 
179       cmd_buffer->state.dynamic.stencil_op.back.compare_op = state->stencil_op.back.compare_op;
180       cmd_buffer->state.dynamic.stencil_op.back.fail_op = state->stencil_op.back.fail_op;
181       cmd_buffer->state.dynamic.stencil_op.back.pass_op = state->stencil_op.back.pass_op;
182       cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op =
183          state->stencil_op.back.depth_fail_op;
184 
185       cmd_buffer->state.dynamic.fragment_shading_rate.size = state->fragment_shading_rate.size;
186       cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0] =
187          state->fragment_shading_rate.combiner_ops[0];
188       cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1] =
189          state->fragment_shading_rate.combiner_ops[1];
190 
191       cmd_buffer->state.dynamic.depth_bias_enable = state->depth_bias_enable;
192 
193       cmd_buffer->state.dynamic.primitive_restart_enable = state->primitive_restart_enable;
194 
195       cmd_buffer->state.dynamic.rasterizer_discard_enable = state->rasterizer_discard_enable;
196 
197       cmd_buffer->state.dynamic.logic_op = state->logic_op;
198 
199       cmd_buffer->state.dynamic.color_write_enable = state->color_write_enable;
200 
201       cmd_buffer->state.dirty |=
202          RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_SCISSOR |
203          RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
204          RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
205          RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
206          RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
207          RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP |
208          RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE |
209          RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE |
210          RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
211          RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE;
212    }
213 
214    if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
215       typed_memcpy(&cmd_buffer->state.dynamic.sample_location.locations,
216                    &state->sample_location.locations, 1);
217 
218       cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
219    }
220 
221    if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
222       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
223                            radv_pipeline_to_handle(state->old_pipeline));
224    }
225 
226    if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
227       radv_set_descriptor_set(cmd_buffer, bind_point, state->old_descriptor_set0, 0);
228    }
229 
230    if (state->flags & RADV_META_SAVE_CONSTANTS) {
231       VkShaderStageFlags stages = VK_SHADER_STAGE_COMPUTE_BIT;
232 
233       if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
234          stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
235 
236       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0,
237                             MAX_PUSH_CONSTANTS_SIZE, state->push_constants);
238    }
239 
240    if (state->flags & RADV_META_SAVE_PASS) {
241       cmd_buffer->state.pass = state->pass;
242       cmd_buffer->state.subpass = state->subpass;
243       cmd_buffer->state.framebuffer = state->framebuffer;
244       cmd_buffer->state.attachments = state->attachments;
245       cmd_buffer->state.render_area = state->render_area;
246       if (state->subpass)
247          cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
248    }
249 }
250 
251 VkImageViewType
radv_meta_get_view_type(const struct radv_image * image)252 radv_meta_get_view_type(const struct radv_image *image)
253 {
254    switch (image->type) {
255    case VK_IMAGE_TYPE_1D:
256       return VK_IMAGE_VIEW_TYPE_1D;
257    case VK_IMAGE_TYPE_2D:
258       return VK_IMAGE_VIEW_TYPE_2D;
259    case VK_IMAGE_TYPE_3D:
260       return VK_IMAGE_VIEW_TYPE_3D;
261    default:
262       unreachable("bad VkImageViewType");
263    }
264 }
265 
266 /**
267  * When creating a destination VkImageView, this function provides the needed
268  * VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
269  */
270 uint32_t
radv_meta_get_iview_layer(const struct radv_image * dest_image,const VkImageSubresourceLayers * dest_subresource,const VkOffset3D * dest_offset)271 radv_meta_get_iview_layer(const struct radv_image *dest_image,
272                           const VkImageSubresourceLayers *dest_subresource,
273                           const VkOffset3D *dest_offset)
274 {
275    switch (dest_image->type) {
276    case VK_IMAGE_TYPE_1D:
277    case VK_IMAGE_TYPE_2D:
278       return dest_subresource->baseArrayLayer;
279    case VK_IMAGE_TYPE_3D:
280       /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
281        * but meta does it anyway. When doing so, we translate the
282        * destination's z offset into an array offset.
283        */
284       return dest_offset->z;
285    default:
286       assert(!"bad VkImageType");
287       return 0;
288    }
289 }
290 
291 static void *
meta_alloc(void * _device,size_t size,size_t alignment,VkSystemAllocationScope allocationScope)292 meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
293 {
294    struct radv_device *device = _device;
295    return device->vk.alloc.pfnAllocation(device->vk.alloc.pUserData, size, alignment,
296                                          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
297 }
298 
299 static void *
meta_realloc(void * _device,void * original,size_t size,size_t alignment,VkSystemAllocationScope allocationScope)300 meta_realloc(void *_device, void *original, size_t size, size_t alignment,
301              VkSystemAllocationScope allocationScope)
302 {
303    struct radv_device *device = _device;
304    return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original, size, alignment,
305                                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
306 }
307 
308 static void
meta_free(void * _device,void * data)309 meta_free(void *_device, void *data)
310 {
311    struct radv_device *device = _device;
312    device->vk.alloc.pfnFree(device->vk.alloc.pUserData, data);
313 }
314 
315 #ifndef _WIN32
316 static bool
radv_builtin_cache_path(char * path)317 radv_builtin_cache_path(char *path)
318 {
319    char *xdg_cache_home = getenv("XDG_CACHE_HOME");
320    const char *suffix = "/radv_builtin_shaders";
321    const char *suffix2 = "/.cache/radv_builtin_shaders";
322    struct passwd pwd, *result;
323    char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
324    int ret;
325 
326    if (xdg_cache_home) {
327       ret = snprintf(path, PATH_MAX + 1, "%s%s%zd", xdg_cache_home, suffix, sizeof(void *) * 8);
328       return ret > 0 && ret < PATH_MAX + 1;
329    }
330 
331    getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
332    if (!result)
333       return false;
334 
335    strcpy(path, pwd.pw_dir);
336    strcat(path, "/.cache");
337    if (mkdir(path, 0755) && errno != EEXIST)
338       return false;
339 
340    ret = snprintf(path, PATH_MAX + 1, "%s%s%zd", pwd.pw_dir, suffix2, sizeof(void *) * 8);
341    return ret > 0 && ret < PATH_MAX + 1;
342 }
343 #endif
344 
345 static bool
radv_load_meta_pipeline(struct radv_device * device)346 radv_load_meta_pipeline(struct radv_device *device)
347 {
348 #ifdef _WIN32
349    return false;
350 #else
351    char path[PATH_MAX + 1];
352    struct stat st;
353    void *data = NULL;
354    bool ret = false;
355 
356    if (!radv_builtin_cache_path(path))
357       return false;
358 
359    int fd = open(path, O_RDONLY);
360    if (fd < 0)
361       return false;
362    if (fstat(fd, &st))
363       goto fail;
364    data = malloc(st.st_size);
365    if (!data)
366       goto fail;
367    if (read(fd, data, st.st_size) == -1)
368       goto fail;
369 
370    ret = radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
371 fail:
372    free(data);
373    close(fd);
374    return ret;
375 #endif
376 }
377 
378 static void
radv_store_meta_pipeline(struct radv_device * device)379 radv_store_meta_pipeline(struct radv_device *device)
380 {
381 #ifndef _WIN32
382    char path[PATH_MAX + 1], path2[PATH_MAX + 7];
383    size_t size;
384    void *data = NULL;
385 
386    if (!device->meta_state.cache.modified)
387       return;
388 
389    if (radv_GetPipelineCacheData(radv_device_to_handle(device),
390                                  radv_pipeline_cache_to_handle(&device->meta_state.cache), &size,
391                                  NULL))
392       return;
393 
394    if (!radv_builtin_cache_path(path))
395       return;
396 
397    strcpy(path2, path);
398    strcat(path2, "XXXXXX");
399    int fd = mkstemp(path2); // open(path, O_WRONLY | O_CREAT, 0600);
400    if (fd < 0)
401       return;
402    data = malloc(size);
403    if (!data)
404       goto fail;
405 
406    if (radv_GetPipelineCacheData(radv_device_to_handle(device),
407                                  radv_pipeline_cache_to_handle(&device->meta_state.cache), &size,
408                                  data))
409       goto fail;
410    if (write(fd, data, size) == -1)
411       goto fail;
412 
413    rename(path2, path);
414 fail:
415    free(data);
416    close(fd);
417    unlink(path2);
418 #endif
419 }
420 
421 VkResult
radv_device_init_meta(struct radv_device * device)422 radv_device_init_meta(struct radv_device *device)
423 {
424    VkResult result;
425 
426    memset(&device->meta_state, 0, sizeof(device->meta_state));
427 
428    device->meta_state.alloc = (VkAllocationCallbacks){
429       .pUserData = device,
430       .pfnAllocation = meta_alloc,
431       .pfnReallocation = meta_realloc,
432       .pfnFree = meta_free,
433    };
434 
435    device->meta_state.cache.alloc = device->meta_state.alloc;
436    radv_pipeline_cache_init(&device->meta_state.cache, device);
437    bool loaded_cache = radv_load_meta_pipeline(device);
438    bool on_demand = !loaded_cache;
439 
440    mtx_init(&device->meta_state.mtx, mtx_plain);
441 
442    result = radv_device_init_meta_clear_state(device, on_demand);
443    if (result != VK_SUCCESS)
444       goto fail_clear;
445 
446    result = radv_device_init_meta_resolve_state(device, on_demand);
447    if (result != VK_SUCCESS)
448       goto fail_resolve;
449 
450    result = radv_device_init_meta_blit_state(device, on_demand);
451    if (result != VK_SUCCESS)
452       goto fail_blit;
453 
454    result = radv_device_init_meta_blit2d_state(device, on_demand);
455    if (result != VK_SUCCESS)
456       goto fail_blit2d;
457 
458    result = radv_device_init_meta_bufimage_state(device);
459    if (result != VK_SUCCESS)
460       goto fail_bufimage;
461 
462    result = radv_device_init_meta_depth_decomp_state(device, on_demand);
463    if (result != VK_SUCCESS)
464       goto fail_depth_decomp;
465 
466    result = radv_device_init_meta_buffer_state(device);
467    if (result != VK_SUCCESS)
468       goto fail_buffer;
469 
470    result = radv_device_init_meta_query_state(device, on_demand);
471    if (result != VK_SUCCESS)
472       goto fail_query;
473 
474    result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
475    if (result != VK_SUCCESS)
476       goto fail_fast_clear;
477 
478    result = radv_device_init_meta_resolve_compute_state(device, on_demand);
479    if (result != VK_SUCCESS)
480       goto fail_resolve_compute;
481 
482    result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
483    if (result != VK_SUCCESS)
484       goto fail_resolve_fragment;
485 
486    result = radv_device_init_meta_fmask_expand_state(device);
487    if (result != VK_SUCCESS)
488       goto fail_fmask_expand;
489 
490    result = radv_device_init_accel_struct_build_state(device);
491    if (result != VK_SUCCESS)
492       goto fail_accel_struct_build;
493 
494    return VK_SUCCESS;
495 
496 fail_accel_struct_build:
497    radv_device_finish_meta_fmask_expand_state(device);
498 fail_fmask_expand:
499    radv_device_finish_meta_resolve_fragment_state(device);
500 fail_resolve_fragment:
501    radv_device_finish_meta_resolve_compute_state(device);
502 fail_resolve_compute:
503    radv_device_finish_meta_fast_clear_flush_state(device);
504 fail_fast_clear:
505    radv_device_finish_meta_query_state(device);
506 fail_query:
507    radv_device_finish_meta_buffer_state(device);
508 fail_buffer:
509    radv_device_finish_meta_depth_decomp_state(device);
510 fail_depth_decomp:
511    radv_device_finish_meta_bufimage_state(device);
512 fail_bufimage:
513    radv_device_finish_meta_blit2d_state(device);
514 fail_blit2d:
515    radv_device_finish_meta_blit_state(device);
516 fail_blit:
517    radv_device_finish_meta_resolve_state(device);
518 fail_resolve:
519    radv_device_finish_meta_clear_state(device);
520 fail_clear:
521    mtx_destroy(&device->meta_state.mtx);
522    radv_pipeline_cache_finish(&device->meta_state.cache);
523    return result;
524 }
525 
526 void
radv_device_finish_meta(struct radv_device * device)527 radv_device_finish_meta(struct radv_device *device)
528 {
529    radv_device_finish_accel_struct_build_state(device);
530    radv_device_finish_meta_clear_state(device);
531    radv_device_finish_meta_resolve_state(device);
532    radv_device_finish_meta_blit_state(device);
533    radv_device_finish_meta_blit2d_state(device);
534    radv_device_finish_meta_bufimage_state(device);
535    radv_device_finish_meta_depth_decomp_state(device);
536    radv_device_finish_meta_query_state(device);
537    radv_device_finish_meta_buffer_state(device);
538    radv_device_finish_meta_fast_clear_flush_state(device);
539    radv_device_finish_meta_resolve_compute_state(device);
540    radv_device_finish_meta_resolve_fragment_state(device);
541    radv_device_finish_meta_fmask_expand_state(device);
542    radv_device_finish_meta_dcc_retile_state(device);
543    radv_device_finish_meta_copy_vrs_htile_state(device);
544 
545    radv_store_meta_pipeline(device);
546    radv_pipeline_cache_finish(&device->meta_state.cache);
547    mtx_destroy(&device->meta_state.mtx);
548 }
549 
550 nir_ssa_def *
radv_meta_gen_rect_vertices_comp2(nir_builder * vs_b,nir_ssa_def * comp2)551 radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
552 {
553 
554    nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(vs_b);
555 
556    /* vertex 0 - -1.0, -1.0 */
557    /* vertex 1 - -1.0, 1.0 */
558    /* vertex 2 - 1.0, -1.0 */
559    /* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
560       channel 1 is vertex id != 1 ? -1.0 : 1.0 */
561 
562    nir_ssa_def *c0cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 2));
563    nir_ssa_def *c1cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 1));
564 
565    nir_ssa_def *comp[4];
566    comp[0] = nir_bcsel(vs_b, c0cmp, nir_imm_float(vs_b, -1.0), nir_imm_float(vs_b, 1.0));
567 
568    comp[1] = nir_bcsel(vs_b, c1cmp, nir_imm_float(vs_b, -1.0), nir_imm_float(vs_b, 1.0));
569    comp[2] = comp2;
570    comp[3] = nir_imm_float(vs_b, 1.0);
571    nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
572 
573    return outvec;
574 }
575 
576 nir_ssa_def *
radv_meta_gen_rect_vertices(nir_builder * vs_b)577 radv_meta_gen_rect_vertices(nir_builder *vs_b)
578 {
579    return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
580 }
581 
582 /* vertex shader that generates vertices */
583 nir_shader *
radv_meta_build_nir_vs_generate_vertices(void)584 radv_meta_build_nir_vs_generate_vertices(void)
585 {
586    const struct glsl_type *vec4 = glsl_vec4_type();
587 
588    nir_variable *v_position;
589 
590    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_vs_gen_verts");
591 
592    nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
593 
594    v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
595    v_position->data.location = VARYING_SLOT_POS;
596 
597    nir_store_var(&b, v_position, outvec, 0xf);
598 
599    return b.shader;
600 }
601 
602 nir_shader *
radv_meta_build_nir_fs_noop(void)603 radv_meta_build_nir_fs_noop(void)
604 {
605    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_noop_fs");
606 
607    return b.shader;
608 }
609 
610 void
radv_meta_build_resolve_shader_core(nir_builder * b,bool is_integer,int samples,nir_variable * input_img,nir_variable * color,nir_ssa_def * img_coord)611 radv_meta_build_resolve_shader_core(nir_builder *b, bool is_integer, int samples,
612                                     nir_variable *input_img, nir_variable *color,
613                                     nir_ssa_def *img_coord)
614 {
615    /* do a txf_ms on each sample */
616    nir_ssa_def *tmp;
617    bool inserted_if = false;
618 
619    nir_ssa_def *input_img_deref = &nir_build_deref_var(b, input_img)->dest.ssa;
620 
621    nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
622    tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
623    tex->op = nir_texop_txf_ms;
624    tex->src[0].src_type = nir_tex_src_coord;
625    tex->src[0].src = nir_src_for_ssa(img_coord);
626    tex->src[1].src_type = nir_tex_src_ms_index;
627    tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
628    tex->src[2].src_type = nir_tex_src_texture_deref;
629    tex->src[2].src = nir_src_for_ssa(input_img_deref);
630    tex->dest_type = nir_type_float32;
631    tex->is_array = false;
632    tex->coord_components = 2;
633 
634    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
635    nir_builder_instr_insert(b, &tex->instr);
636 
637    tmp = &tex->dest.ssa;
638 
639    if (!is_integer && samples > 1) {
640       nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 2);
641       tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
642       tex_all_same->op = nir_texop_samples_identical;
643       tex_all_same->src[0].src_type = nir_tex_src_coord;
644       tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
645       tex_all_same->src[1].src_type = nir_tex_src_texture_deref;
646       tex_all_same->src[1].src = nir_src_for_ssa(input_img_deref);
647       tex_all_same->dest_type = nir_type_bool1;
648       tex_all_same->is_array = false;
649       tex_all_same->coord_components = 2;
650 
651       nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 1, "tex");
652       nir_builder_instr_insert(b, &tex_all_same->instr);
653 
654       nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_bool(b, false));
655       nir_push_if(b, all_same);
656       for (int i = 1; i < samples; i++) {
657          nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 3);
658          tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
659          tex_add->op = nir_texop_txf_ms;
660          tex_add->src[0].src_type = nir_tex_src_coord;
661          tex_add->src[0].src = nir_src_for_ssa(img_coord);
662          tex_add->src[1].src_type = nir_tex_src_ms_index;
663          tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
664          tex_add->src[2].src_type = nir_tex_src_texture_deref;
665          tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
666          tex_add->dest_type = nir_type_float32;
667          tex_add->is_array = false;
668          tex_add->coord_components = 2;
669 
670          nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
671          nir_builder_instr_insert(b, &tex_add->instr);
672 
673          tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
674       }
675 
676       tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
677       nir_store_var(b, color, tmp, 0xf);
678       nir_push_else(b, NULL);
679       inserted_if = true;
680    }
681    nir_store_var(b, color, &tex->dest.ssa, 0xf);
682 
683    if (inserted_if)
684       nir_pop_if(b, NULL);
685 }
686 
687 nir_ssa_def *
radv_meta_load_descriptor(nir_builder * b,unsigned desc_set,unsigned binding)688 radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
689 {
690    nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set,
691                                                  .binding = binding);
692    return nir_channels(b, rsrc, 0x3);
693 }
694 
695 nir_ssa_def *
get_global_ids(nir_builder * b,unsigned num_components)696 get_global_ids(nir_builder *b, unsigned num_components)
697 {
698    unsigned mask = BITFIELD_MASK(num_components);
699 
700    nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
701    nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
702    nir_ssa_def *block_size = nir_channels(
703       b,
704       nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
705                     b->shader->info.workgroup_size[2], 0),
706       mask);
707 
708    return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
709 }
710