1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "vk_format_info.h"
29 #include "util/u_pack_color.h"
30 #include "vulkan/util/vk_common_entrypoints.h"
31
32 static uint32_t
meta_blit_key_hash(const void * key)33 meta_blit_key_hash(const void *key)
34 {
35 return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
36 }
37
38 static bool
meta_blit_key_compare(const void * key1,const void * key2)39 meta_blit_key_compare(const void *key1, const void *key2)
40 {
41 return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
42 }
43
44 static bool
create_blit_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * descriptor_set_layout,VkPipelineLayout * pipeline_layout)45 create_blit_pipeline_layout(struct v3dv_device *device,
46 VkDescriptorSetLayout *descriptor_set_layout,
47 VkPipelineLayout *pipeline_layout)
48 {
49 VkResult result;
50
51 if (*descriptor_set_layout == 0) {
52 VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
53 .binding = 0,
54 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
55 .descriptorCount = 1,
56 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
57 };
58 VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
59 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
60 .bindingCount = 1,
61 .pBindings = &descriptor_set_layout_binding,
62 };
63 result =
64 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
65 &descriptor_set_layout_info,
66 &device->vk.alloc,
67 descriptor_set_layout);
68 if (result != VK_SUCCESS)
69 return false;
70 }
71
72 assert(*pipeline_layout == 0);
73 VkPipelineLayoutCreateInfo pipeline_layout_info = {
74 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
75 .setLayoutCount = 1,
76 .pSetLayouts = descriptor_set_layout,
77 .pushConstantRangeCount = 1,
78 .pPushConstantRanges =
79 &(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
80 };
81
82 result =
83 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
84 &pipeline_layout_info,
85 &device->vk.alloc,
86 pipeline_layout);
87 return result == VK_SUCCESS;
88 }
89
90 void
v3dv_meta_blit_init(struct v3dv_device * device)91 v3dv_meta_blit_init(struct v3dv_device *device)
92 {
93 for (uint32_t i = 0; i < 3; i++) {
94 device->meta.blit.cache[i] =
95 _mesa_hash_table_create(NULL,
96 meta_blit_key_hash,
97 meta_blit_key_compare);
98 }
99
100 create_blit_pipeline_layout(device,
101 &device->meta.blit.ds_layout,
102 &device->meta.blit.p_layout);
103 }
104
105 void
v3dv_meta_blit_finish(struct v3dv_device * device)106 v3dv_meta_blit_finish(struct v3dv_device *device)
107 {
108 VkDevice _device = v3dv_device_to_handle(device);
109
110 for (uint32_t i = 0; i < 3; i++) {
111 hash_table_foreach(device->meta.blit.cache[i], entry) {
112 struct v3dv_meta_blit_pipeline *item = entry->data;
113 v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
114 v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
115 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
116 vk_free(&device->vk.alloc, item);
117 }
118 _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
119 }
120
121 if (device->meta.blit.p_layout) {
122 v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
123 &device->vk.alloc);
124 }
125
126 if (device->meta.blit.ds_layout) {
127 v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
128 &device->vk.alloc);
129 }
130 }
131
132 static uint32_t
meta_texel_buffer_copy_key_hash(const void * key)133 meta_texel_buffer_copy_key_hash(const void *key)
134 {
135 return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
136 }
137
138 static bool
meta_texel_buffer_copy_key_compare(const void * key1,const void * key2)139 meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
140 {
141 return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
142 }
143
144 static bool
create_texel_buffer_copy_pipeline_layout(struct v3dv_device * device,VkDescriptorSetLayout * ds_layout,VkPipelineLayout * p_layout)145 create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
146 VkDescriptorSetLayout *ds_layout,
147 VkPipelineLayout *p_layout)
148 {
149 VkResult result;
150
151 if (*ds_layout == 0) {
152 VkDescriptorSetLayoutBinding ds_layout_binding = {
153 .binding = 0,
154 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
155 .descriptorCount = 1,
156 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
157 };
158 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
159 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
160 .bindingCount = 1,
161 .pBindings = &ds_layout_binding,
162 };
163 result =
164 v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
165 &ds_layout_info,
166 &device->vk.alloc,
167 ds_layout);
168 if (result != VK_SUCCESS)
169 return false;
170 }
171
172 assert(*p_layout == 0);
173 /* FIXME: this is abusing a bit the API, since not all of our copy
174 * pipelines have a geometry shader. We could create 2 different pipeline
175 * layouts, but this works for us for now.
176 */
177 #define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
178 #define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
179 #define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
180 #define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
181 VkPushConstantRange ranges[2] = {
182 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
183 { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
184 };
185
186 VkPipelineLayoutCreateInfo p_layout_info = {
187 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
188 .setLayoutCount = 1,
189 .pSetLayouts = ds_layout,
190 .pushConstantRangeCount = 2,
191 .pPushConstantRanges = ranges,
192 };
193
194 result =
195 v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
196 &p_layout_info,
197 &device->vk.alloc,
198 p_layout);
199 return result == VK_SUCCESS;
200 }
201
202 void
v3dv_meta_texel_buffer_copy_init(struct v3dv_device * device)203 v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
204 {
205 for (uint32_t i = 0; i < 3; i++) {
206 device->meta.texel_buffer_copy.cache[i] =
207 _mesa_hash_table_create(NULL,
208 meta_texel_buffer_copy_key_hash,
209 meta_texel_buffer_copy_key_compare);
210 }
211
212 create_texel_buffer_copy_pipeline_layout(
213 device,
214 &device->meta.texel_buffer_copy.ds_layout,
215 &device->meta.texel_buffer_copy.p_layout);
216 }
217
218 void
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device * device)219 v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
220 {
221 VkDevice _device = v3dv_device_to_handle(device);
222
223 for (uint32_t i = 0; i < 3; i++) {
224 hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
225 struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
226 v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
227 v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
228 v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
229 vk_free(&device->vk.alloc, item);
230 }
231 _mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
232 }
233
234 if (device->meta.texel_buffer_copy.p_layout) {
235 v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
236 &device->vk.alloc);
237 }
238
239 if (device->meta.texel_buffer_copy.ds_layout) {
240 v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
241 &device->vk.alloc);
242 }
243 }
244
245 static VkFormat
get_compatible_tlb_format(VkFormat format)246 get_compatible_tlb_format(VkFormat format)
247 {
248 switch (format) {
249 case VK_FORMAT_R8G8B8A8_SNORM:
250 return VK_FORMAT_R8G8B8A8_UINT;
251
252 case VK_FORMAT_R8G8_SNORM:
253 return VK_FORMAT_R8G8_UINT;
254
255 case VK_FORMAT_R8_SNORM:
256 return VK_FORMAT_R8_UINT;
257
258 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
259 return VK_FORMAT_A8B8G8R8_UINT_PACK32;
260
261 case VK_FORMAT_R16_UNORM:
262 case VK_FORMAT_R16_SNORM:
263 return VK_FORMAT_R16_UINT;
264
265 case VK_FORMAT_R16G16_UNORM:
266 case VK_FORMAT_R16G16_SNORM:
267 return VK_FORMAT_R16G16_UINT;
268
269 case VK_FORMAT_R16G16B16A16_UNORM:
270 case VK_FORMAT_R16G16B16A16_SNORM:
271 return VK_FORMAT_R16G16B16A16_UINT;
272
273 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
274 return VK_FORMAT_R32_SFLOAT;
275
276 /* We can't render to compressed formats using the TLB so instead we use
277 * a compatible format with the same bpp as the compressed format. Because
278 * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
279 * case of ETC), when we implement copies with the compatible format we
280 * will have to divide offsets and dimensions on the compressed image by
281 * the compressed block size.
282 */
283 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
284 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
285 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
286 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
287 case VK_FORMAT_BC2_UNORM_BLOCK:
288 case VK_FORMAT_BC2_SRGB_BLOCK:
289 case VK_FORMAT_BC3_SRGB_BLOCK:
290 case VK_FORMAT_BC3_UNORM_BLOCK:
291 case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
292 case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
293 case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
294 case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
295 case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
296 case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
297 case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
298 case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
299 case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
300 case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
301 case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
302 case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
303 case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
304 case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
305 case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
306 case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
307 case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
308 case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
309 case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
310 case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
311 case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
312 case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
313 case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
314 case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
315 case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
316 case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
317 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
318 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
319 return VK_FORMAT_R32G32B32A32_UINT;
320
321 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
322 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
323 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
324 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
325 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
326 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
327 case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
328 case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
329 case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
330 case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
331 return VK_FORMAT_R16G16B16A16_UINT;
332
333 default:
334 return VK_FORMAT_UNDEFINED;
335 }
336 }
337
338 /**
339 * Checks if we can implement an image copy or clear operation using the TLB
340 * hardware.
341 */
342 bool
v3dv_meta_can_use_tlb(struct v3dv_image * image,const VkOffset3D * offset,VkFormat * compat_format)343 v3dv_meta_can_use_tlb(struct v3dv_image *image,
344 const VkOffset3D *offset,
345 VkFormat *compat_format)
346 {
347 if (offset->x != 0 || offset->y != 0)
348 return false;
349
350 if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
351 if (compat_format)
352 *compat_format = image->vk.format;
353 return true;
354 }
355
356 /* If the image format is not TLB-supported, then check if we can use
357 * a compatible format instead.
358 */
359 if (compat_format) {
360 *compat_format = get_compatible_tlb_format(image->vk.format);
361 if (*compat_format != VK_FORMAT_UNDEFINED)
362 return true;
363 }
364
365 return false;
366 }
367
368 /* Implements a copy using the TLB.
369 *
370 * This only works if we are copying from offset (0,0), since a TLB store for
371 * tile (x,y) will be written at the same tile offset into the destination.
372 * When this requirement is not met, we need to use a blit instead.
373 *
374 * Returns true if the implementation supports the requested operation (even if
375 * it failed to process it, for example, due to an out-of-memory error).
376 *
377 */
378 static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2KHR * region)379 copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
380 struct v3dv_buffer *buffer,
381 struct v3dv_image *image,
382 const VkBufferImageCopy2KHR *region)
383 {
384 VkFormat fb_format;
385 if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format))
386 return false;
387
388 uint32_t internal_type, internal_bpp;
389 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
390 (fb_format, region->imageSubresource.aspectMask,
391 &internal_type, &internal_bpp);
392
393 uint32_t num_layers;
394 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
395 num_layers = region->imageSubresource.layerCount;
396 else
397 num_layers = region->imageExtent.depth;
398 assert(num_layers > 0);
399
400 struct v3dv_job *job =
401 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
402 if (!job)
403 return true;
404
405 /* Handle copy from compressed format using a compatible format */
406 const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
407 const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
408 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
409 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
410
411 v3dv_job_start_frame(job, width, height, num_layers, false,
412 1, internal_bpp, false);
413
414 struct v3dv_meta_framebuffer framebuffer;
415 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
416 internal_type, &job->frame_tiling);
417
418 v3dv_X(job->device, job_emit_binning_flush)(job);
419 v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
420 (job, buffer, image, &framebuffer, region);
421
422 v3dv_cmd_buffer_finish_job(cmd_buffer);
423
424 return true;
425 }
426
427 static bool
428 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
429 struct v3dv_image *dst,
430 VkFormat dst_format,
431 struct v3dv_image *src,
432 VkFormat src_format,
433 VkColorComponentFlags cmask,
434 VkComponentMapping *cswizzle,
435 const VkImageBlit2KHR *region,
436 VkFilter filter,
437 bool dst_is_padded_image);
438
439 /**
440 * Returns true if the implementation supports the requested operation (even if
441 * it failed to process it, for example, due to an out-of-memory error).
442 */
443 static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_buffer * buffer,struct v3dv_image * image,const VkBufferImageCopy2KHR * region)444 copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
445 struct v3dv_buffer *buffer,
446 struct v3dv_image *image,
447 const VkBufferImageCopy2KHR *region)
448 {
449 bool handled = false;
450
451 /* Generally, the bpp of the data in the buffer matches that of the
452 * source image. The exception is the case where we are copying
453 * stencil (8bpp) to a combined d24s8 image (32bpp).
454 */
455 uint32_t buffer_bpp = image->cpp;
456
457 VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
458
459 /* Because we are going to implement the copy as a blit, we need to create
460 * a linear image from the destination buffer and we also want our blit
461 * source and destination formats to be the same (to avoid any format
462 * conversions), so we choose a canonical format that matches the
463 * source image bpp.
464 *
465 * The exception to the above is copying from combined depth/stencil images
466 * because we are copying only one aspect of the image, so we need to setup
467 * our formats, color write mask and source swizzle mask to match that.
468 */
469 VkFormat dst_format;
470 VkFormat src_format;
471 VkColorComponentFlags cmask = 0; /* All components */
472 VkComponentMapping cswizzle = {
473 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
474 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
475 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
476 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
477 };
478 switch (buffer_bpp) {
479 case 16:
480 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
481 dst_format = VK_FORMAT_R32G32B32A32_UINT;
482 src_format = dst_format;
483 break;
484 case 8:
485 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
486 dst_format = VK_FORMAT_R16G16B16A16_UINT;
487 src_format = dst_format;
488 break;
489 case 4:
490 switch (copy_aspect) {
491 case VK_IMAGE_ASPECT_COLOR_BIT:
492 src_format = VK_FORMAT_R8G8B8A8_UINT;
493 dst_format = VK_FORMAT_R8G8B8A8_UINT;
494 break;
495 case VK_IMAGE_ASPECT_DEPTH_BIT:
496 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
497 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
498 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
499 if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
500 src_format = VK_FORMAT_R32_UINT;
501 dst_format = VK_FORMAT_R32_UINT;
502 } else {
503 /* We want to write depth in the buffer in the first 24-bits,
504 * however, the hardware has depth in bits 8-31, so swizzle the
505 * the source components to match what we want. Also, we don't
506 * want to write bits 24-31 in the destination.
507 */
508 src_format = VK_FORMAT_R8G8B8A8_UINT;
509 dst_format = VK_FORMAT_R8G8B8A8_UINT;
510 cmask = VK_COLOR_COMPONENT_R_BIT |
511 VK_COLOR_COMPONENT_G_BIT |
512 VK_COLOR_COMPONENT_B_BIT;
513 cswizzle.r = VK_COMPONENT_SWIZZLE_G;
514 cswizzle.g = VK_COMPONENT_SWIZZLE_B;
515 cswizzle.b = VK_COMPONENT_SWIZZLE_A;
516 cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
517 }
518 break;
519 case VK_IMAGE_ASPECT_STENCIL_BIT:
520 assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
521 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
522 /* Copying from S8D24. We want to write 8-bit stencil values only,
523 * so adjust the buffer bpp for that. Since the hardware stores stencil
524 * in the LSB, we can just do a RGBA8UI to R8UI blit.
525 */
526 src_format = VK_FORMAT_R8G8B8A8_UINT;
527 dst_format = VK_FORMAT_R8_UINT;
528 buffer_bpp = 1;
529 break;
530 default:
531 unreachable("unsupported aspect");
532 return handled;
533 };
534 break;
535 case 2:
536 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
537 copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
538 dst_format = VK_FORMAT_R16_UINT;
539 src_format = dst_format;
540 break;
541 case 1:
542 assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
543 dst_format = VK_FORMAT_R8_UINT;
544 src_format = dst_format;
545 break;
546 default:
547 unreachable("unsupported bit-size");
548 return handled;
549 };
550
551 /* The hardware doesn't support linear depth/stencil stores, so we
552 * implement copies of depth/stencil aspect as color copies using a
553 * compatible color format.
554 */
555 assert(vk_format_is_color(src_format));
556 assert(vk_format_is_color(dst_format));
557 copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
558
559 /* We should be able to handle the blit if we got this far */
560 handled = true;
561
562 /* Obtain the 2D buffer region spec */
563 uint32_t buf_width, buf_height;
564 if (region->bufferRowLength == 0)
565 buf_width = region->imageExtent.width;
566 else
567 buf_width = region->bufferRowLength;
568
569 if (region->bufferImageHeight == 0)
570 buf_height = region->imageExtent.height;
571 else
572 buf_height = region->bufferImageHeight;
573
574 /* If the image is compressed, the bpp refers to blocks, not pixels */
575 uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
576 uint32_t block_height = vk_format_get_blockheight(image->vk.format);
577 buf_width = buf_width / block_width;
578 buf_height = buf_height / block_height;
579
580 /* Compute layers to copy */
581 uint32_t num_layers;
582 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
583 num_layers = region->imageSubresource.layerCount;
584 else
585 num_layers = region->imageExtent.depth;
586 assert(num_layers > 0);
587
588 /* Our blit interface can see the real format of the images to detect
589 * copies between compressed and uncompressed images and adapt the
590 * blit region accordingly. Here we are just doing a raw copy of
591 * compressed data, but we are passing an uncompressed view of the
592 * buffer for the blit destination image (since compressed formats are
593 * not renderable), so we also want to provide an uncompressed view of
594 * the source image.
595 */
596 VkResult result;
597 struct v3dv_device *device = cmd_buffer->device;
598 VkDevice _device = v3dv_device_to_handle(device);
599 if (vk_format_is_compressed(image->vk.format)) {
600 VkImage uiview;
601 VkImageCreateInfo uiview_info = {
602 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
603 .imageType = VK_IMAGE_TYPE_3D,
604 .format = dst_format,
605 .extent = { buf_width, buf_height, image->vk.extent.depth },
606 .mipLevels = image->vk.mip_levels,
607 .arrayLayers = image->vk.array_layers,
608 .samples = image->vk.samples,
609 .tiling = image->vk.tiling,
610 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
611 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
612 .queueFamilyIndexCount = 0,
613 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
614 };
615 result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
616 if (result != VK_SUCCESS)
617 return handled;
618
619 v3dv_cmd_buffer_add_private_obj(
620 cmd_buffer, (uintptr_t)uiview,
621 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
622
623 result =
624 vk_common_BindImageMemory(_device, uiview,
625 v3dv_device_memory_to_handle(image->mem),
626 image->mem_offset);
627 if (result != VK_SUCCESS)
628 return handled;
629
630 image = v3dv_image_from_handle(uiview);
631 }
632
633 /* Copy requested layers */
634 for (uint32_t i = 0; i < num_layers; i++) {
635 /* Create the destination blit image from the destination buffer */
636 VkImageCreateInfo image_info = {
637 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
638 .imageType = VK_IMAGE_TYPE_2D,
639 .format = dst_format,
640 .extent = { buf_width, buf_height, 1 },
641 .mipLevels = 1,
642 .arrayLayers = 1,
643 .samples = VK_SAMPLE_COUNT_1_BIT,
644 .tiling = VK_IMAGE_TILING_LINEAR,
645 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
646 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
647 .queueFamilyIndexCount = 0,
648 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
649 };
650
651 VkImage buffer_image;
652 result =
653 v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
654 if (result != VK_SUCCESS)
655 return handled;
656
657 v3dv_cmd_buffer_add_private_obj(
658 cmd_buffer, (uintptr_t)buffer_image,
659 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
660
661 /* Bind the buffer memory to the image */
662 VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
663 i * buf_width * buf_height * buffer_bpp;
664 result =
665 vk_common_BindImageMemory(_device, buffer_image,
666 v3dv_device_memory_to_handle(buffer->mem),
667 buffer_offset);
668 if (result != VK_SUCCESS)
669 return handled;
670
671 /* Blit-copy the requested image extent.
672 *
673 * Since we are copying, the blit must use the same format on the
674 * destination and source images to avoid format conversions. The
675 * only exception is copying stencil, which we upload to a R8UI source
676 * image, but that we need to blit to a S8D24 destination (the only
677 * stencil format we support).
678 */
679 const VkImageBlit2KHR blit_region = {
680 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
681 .srcSubresource = {
682 .aspectMask = copy_aspect,
683 .mipLevel = region->imageSubresource.mipLevel,
684 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
685 .layerCount = 1,
686 },
687 .srcOffsets = {
688 {
689 DIV_ROUND_UP(region->imageOffset.x, block_width),
690 DIV_ROUND_UP(region->imageOffset.y, block_height),
691 region->imageOffset.z + i,
692 },
693 {
694 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
695 block_width),
696 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
697 block_height),
698 region->imageOffset.z + i + 1,
699 },
700 },
701 .dstSubresource = {
702 .aspectMask = copy_aspect,
703 .mipLevel = 0,
704 .baseArrayLayer = 0,
705 .layerCount = 1,
706 },
707 .dstOffsets = {
708 { 0, 0, 0 },
709 {
710 DIV_ROUND_UP(region->imageExtent.width, block_width),
711 DIV_ROUND_UP(region->imageExtent.height, block_height),
712 1
713 },
714 },
715 };
716
717 handled = blit_shader(cmd_buffer,
718 v3dv_image_from_handle(buffer_image), dst_format,
719 image, src_format,
720 cmask, &cswizzle,
721 &blit_region, VK_FILTER_NEAREST, false);
722 if (!handled) {
723 /* This is unexpected, we should have a supported blit spec */
724 unreachable("Unable to blit buffer to destination image");
725 return false;
726 }
727 }
728
729 assert(handled);
730 return true;
731 }
732
733 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2KHR * info)734 v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
735 const VkCopyImageToBufferInfo2KHR *info)
736
737 {
738 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
739 V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
740 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
741
742 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
743
744 for (uint32_t i = 0; i < info->regionCount; i++) {
745 if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
746 continue;
747 if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
748 continue;
749 unreachable("Unsupported image to buffer copy.");
750 }
751 }
752
753 /**
754 * Returns true if the implementation supports the requested operation (even if
755 * it failed to process it, for example, due to an out-of-memory error).
756 */
757 static bool
copy_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2KHR * region)758 copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
759 struct v3dv_image *dst,
760 struct v3dv_image *src,
761 const VkImageCopy2KHR *region)
762 {
763 /* Destination can't be raster format */
764 if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
765 return false;
766
767 /* We can only do full copies, so if the format is D24S8 both aspects need
768 * to be copied. We only need to check the dst format because the spec
769 * states that depth/stencil formats must match exactly.
770 */
771 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
772 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
773 VK_IMAGE_ASPECT_STENCIL_BIT;
774 if (region->dstSubresource.aspectMask != ds_aspects)
775 return false;
776 }
777
778 /* Don't handle copies between uncompressed and compressed formats for now.
779 *
780 * FIXME: we should be able to handle these easily but there is no coverage
781 * in CTS at the moment that make such copies with full images (which we
782 * require here), only partial copies. Also, in that case the code below that
783 * checks for "dst image complete" requires some changes, since it is
784 * checking against the region dimensions, which are in units of the source
785 * image format.
786 */
787 if (vk_format_is_compressed(dst->vk.format) !=
788 vk_format_is_compressed(src->vk.format)) {
789 return false;
790 }
791
792 /* Source region must start at (0,0) */
793 if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
794 return false;
795
796 /* Destination image must be complete */
797 if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
798 return false;
799
800 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
801 uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
802 uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
803 if (region->extent.width != dst_width || region->extent.height != dst_height)
804 return false;
805
806 /* From vkCmdCopyImage:
807 *
808 * "When copying between compressed and uncompressed formats the extent
809 * members represent the texel dimensions of the source image and not
810 * the destination."
811 */
812 const uint32_t block_w = vk_format_get_blockwidth(src->vk.format);
813 const uint32_t block_h = vk_format_get_blockheight(src->vk.format);
814 uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
815 uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
816
817 /* Account for sample count */
818 assert(dst->vk.samples == src->vk.samples);
819 if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
820 assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
821 width *= 2;
822 height *= 2;
823 }
824
825 /* The TFU unit doesn't handle format conversions so we need the formats to
826 * match. On the other hand, vkCmdCopyImage allows different color formats
827 * on the source and destination images, but only if they are texel
828 * compatible. For us, this means that we can effectively ignore different
829 * formats and just make the copy using either of them, since we are just
830 * moving raw data and not making any conversions.
831 *
832 * Also, the formats supported by the TFU unit are limited, but again, since
833 * we are only doing raw copies here without interpreting or converting
834 * the underlying pixel data according to its format, we can always choose
835 * to use compatible formats that are supported with the TFU unit.
836 */
837 assert(dst->cpp == src->cpp);
838 const struct v3dv_format *format =
839 v3dv_get_compatible_tfu_format(cmd_buffer->device,
840 dst->cpp, NULL);
841
842 /* Emit a TFU job for each layer to blit */
843 const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
844 region->dstSubresource.layerCount :
845 region->extent.depth;
846 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
847
848 const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
849 region->srcSubresource.baseArrayLayer : region->srcOffset.z;
850 const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
851 region->dstSubresource.baseArrayLayer : region->dstOffset.z;
852 for (uint32_t i = 0; i < layer_count; i++) {
853 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)
854 (cmd_buffer, dst, dst_mip_level, base_dst_layer + i,
855 src, src_mip_level, base_src_layer + i,
856 width, height, format);
857 }
858
859 return true;
860 }
861
862 /**
863 * Returns true if the implementation supports the requested operation (even if
864 * it failed to process it, for example, due to an out-of-memory error).
865 */
866 static bool
copy_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2KHR * region)867 copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
868 struct v3dv_image *dst,
869 struct v3dv_image *src,
870 const VkImageCopy2KHR *region)
871 {
872 VkFormat fb_format;
873 if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, &fb_format) ||
874 !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, &fb_format)) {
875 return false;
876 }
877
878 /* From the Vulkan spec, VkImageCopy valid usage:
879 *
880 * "If neither the calling command’s srcImage nor the calling command’s
881 * dstImage has a multi-planar image format then the aspectMask member
882 * of srcSubresource and dstSubresource must match."
883 */
884 assert(region->dstSubresource.aspectMask ==
885 region->srcSubresource.aspectMask);
886 uint32_t internal_type, internal_bpp;
887 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
888 (fb_format, region->dstSubresource.aspectMask,
889 &internal_type, &internal_bpp);
890
891 /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
892 *
893 * "The number of slices of the extent (for 3D) or layers of the
894 * srcSubresource (for non-3D) must match the number of slices of the
895 * extent (for 3D) or layers of the dstSubresource (for non-3D)."
896 */
897 assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
898 region->srcSubresource.layerCount : region->extent.depth) ==
899 (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
900 region->dstSubresource.layerCount : region->extent.depth));
901 uint32_t num_layers;
902 if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
903 num_layers = region->dstSubresource.layerCount;
904 else
905 num_layers = region->extent.depth;
906 assert(num_layers > 0);
907
908 struct v3dv_job *job =
909 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
910 if (!job)
911 return true;
912
913 /* Handle copy to compressed image using compatible format */
914 const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
915 const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
916 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
917 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
918
919 v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp,
920 src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
921
922 struct v3dv_meta_framebuffer framebuffer;
923 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
924 internal_type, &job->frame_tiling);
925
926 v3dv_X(job->device, job_emit_binning_flush)(job);
927 v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
928
929 v3dv_cmd_buffer_finish_job(cmd_buffer);
930
931 return true;
932 }
933
934 /**
935 * Takes the image provided as argument and creates a new image that has
936 * the same specification and aliases the same memory storage, except that:
937 *
938 * - It has the uncompressed format passed in.
939 * - Its original width/height are scaled by the factors passed in.
940 *
941 * This is useful to implement copies from compressed images using the blit
942 * path. The idea is that we create uncompressed "image views" of both the
943 * source and destination images using the uncompressed format and then we
944 * define the copy blit in terms of that format.
945 */
946 static struct v3dv_image *
create_image_alias(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * src,float width_scale,float height_scale,VkFormat format)947 create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
948 struct v3dv_image *src,
949 float width_scale,
950 float height_scale,
951 VkFormat format)
952 {
953 assert(!vk_format_is_compressed(format));
954
955 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
956
957 VkImageCreateInfo info = {
958 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
959 .imageType = src->vk.image_type,
960 .format = format,
961 .extent = {
962 .width = src->vk.extent.width * width_scale,
963 .height = src->vk.extent.height * height_scale,
964 .depth = src->vk.extent.depth,
965 },
966 .mipLevels = src->vk.mip_levels,
967 .arrayLayers = src->vk.array_layers,
968 .samples = src->vk.samples,
969 .tiling = src->vk.tiling,
970 .usage = src->vk.usage,
971 };
972
973 VkImage _image;
974 VkResult result =
975 v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
976 if (result != VK_SUCCESS) {
977 v3dv_flag_oom(cmd_buffer, NULL);
978 return NULL;
979 }
980
981 struct v3dv_image *image = v3dv_image_from_handle(_image);
982 image->mem = src->mem;
983 image->mem_offset = src->mem_offset;
984 return image;
985 }
986
987 /**
988 * Returns true if the implementation supports the requested operation (even if
989 * it failed to process it, for example, due to an out-of-memory error).
990 */
991 static bool
copy_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageCopy2KHR * region)992 copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
993 struct v3dv_image *dst,
994 struct v3dv_image *src,
995 const VkImageCopy2KHR *region)
996 {
997 const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
998 const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
999 const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
1000 const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
1001 const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1002 const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1003
1004 /* We need to choose a single format for the blit to ensure that this is
1005 * really a copy and there are not format conversions going on. Since we
1006 * going to blit, we need to make sure that the selected format can be
1007 * both rendered to and textured from.
1008 */
1009 VkFormat format;
1010 float src_scale_w = 1.0f;
1011 float src_scale_h = 1.0f;
1012 float dst_scale_w = block_scale_w;
1013 float dst_scale_h = block_scale_h;
1014 if (vk_format_is_compressed(src->vk.format)) {
1015 /* If we are copying from a compressed format we should be aware that we
1016 * are going to texture from the source image, and the texture setup
1017 * knows the actual size of the image, so we need to choose a format
1018 * that has a per-texel (not per-block) bpp that is compatible for that
1019 * image size. For example, for a source image with size Bw*WxBh*H
1020 * and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1021 * each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1022 * (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1023 * so we could specify a blit with size Bw*WxBh*H and a format with
1024 * a bpp of 8-bit per texel (R8_UINT).
1025 *
1026 * Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1027 * which is 64-bit per texel, then we would need a 4-bit format, which
1028 * we don't have, so instead we still choose an 8-bit format, but we
1029 * apply a divisor to the row dimensions of the blit, since we are
1030 * copying two texels per item.
1031 *
1032 * Generally, we can choose any format so long as we compute appropriate
1033 * divisors for the width and height depending on the source image's
1034 * bpp.
1035 */
1036 assert(src->cpp == dst->cpp);
1037
1038 format = VK_FORMAT_R32G32_UINT;
1039 switch (src->cpp) {
1040 case 16:
1041 format = VK_FORMAT_R32G32B32A32_UINT;
1042 break;
1043 case 8:
1044 format = VK_FORMAT_R16G16B16A16_UINT;
1045 break;
1046 default:
1047 unreachable("Unsupported compressed format");
1048 }
1049
1050 /* Create image views of the src/dst images that we can interpret in
1051 * terms of the canonical format.
1052 */
1053 src_scale_w /= src_block_w;
1054 src_scale_h /= src_block_h;
1055 dst_scale_w /= src_block_w;
1056 dst_scale_h /= src_block_h;
1057
1058 src = create_image_alias(cmd_buffer, src,
1059 src_scale_w, src_scale_h, format);
1060
1061 dst = create_image_alias(cmd_buffer, dst,
1062 dst_scale_w, dst_scale_h, format);
1063 } else {
1064 format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1065 src->vk.format : get_compatible_tlb_format(src->vk.format);
1066 if (format == VK_FORMAT_UNDEFINED)
1067 return false;
1068
1069 const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1070 if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
1071 return false;
1072 }
1073
1074 /* Given an uncompressed image with size WxH, if we copy it to a compressed
1075 * image, it will result in an image with size W*bWxH*bH, where bW and bH
1076 * are the compressed format's block width and height. This means that
1077 * copies between compressed and uncompressed images involve different
1078 * image sizes, and therefore, we need to take that into account when
1079 * setting up the source and destination blit regions below, so they are
1080 * consistent from the point of view of the single compatible format
1081 * selected for the copy.
1082 *
1083 * We should take into account that the dimensions of the region provided
1084 * to the copy command are specified in terms of the source image. With that
1085 * in mind, below we adjust the blit destination region to be consistent with
1086 * the source region for the compatible format, so basically, we apply
1087 * the block scale factor to the destination offset provided by the copy
1088 * command (because it is specified in terms of the destination image, not
1089 * the source), and then we just add the region copy dimensions to that
1090 * (since the region dimensions are already specified in terms of the source
1091 * image).
1092 */
1093 const VkOffset3D src_start = {
1094 region->srcOffset.x * src_scale_w,
1095 region->srcOffset.y * src_scale_h,
1096 region->srcOffset.z,
1097 };
1098 const VkOffset3D src_end = {
1099 src_start.x + region->extent.width * src_scale_w,
1100 src_start.y + region->extent.height * src_scale_h,
1101 src_start.z + region->extent.depth,
1102 };
1103
1104 const VkOffset3D dst_start = {
1105 region->dstOffset.x * dst_scale_w,
1106 region->dstOffset.y * dst_scale_h,
1107 region->dstOffset.z,
1108 };
1109 const VkOffset3D dst_end = {
1110 dst_start.x + region->extent.width * src_scale_w,
1111 dst_start.y + region->extent.height * src_scale_h,
1112 dst_start.z + region->extent.depth,
1113 };
1114
1115 const VkImageBlit2KHR blit_region = {
1116 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
1117 .srcSubresource = region->srcSubresource,
1118 .srcOffsets = { src_start, src_end },
1119 .dstSubresource = region->dstSubresource,
1120 .dstOffsets = { dst_start, dst_end },
1121 };
1122 bool handled = blit_shader(cmd_buffer,
1123 dst, format,
1124 src, format,
1125 0, NULL,
1126 &blit_region, VK_FILTER_NEAREST, true);
1127
1128 /* We should have selected formats that we can blit */
1129 assert(handled);
1130 return handled;
1131 }
1132
1133 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,const VkCopyImageInfo2KHR * info)1134 v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
1135 const VkCopyImageInfo2KHR *info)
1136
1137 {
1138 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1139 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1140 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1141
1142 assert(src->vk.samples == dst->vk.samples);
1143
1144 for (uint32_t i = 0; i < info->regionCount; i++) {
1145 if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
1146 continue;
1147 if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
1148 continue;
1149 if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
1150 continue;
1151 unreachable("Image copy not supported");
1152 }
1153 }
1154
1155 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2KHR * pCopyBufferInfo)1156 v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
1157 const VkCopyBufferInfo2KHR *pCopyBufferInfo)
1158 {
1159 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1160 V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1161 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1162
1163 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1164 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1165 (cmd_buffer,
1166 dst_buffer->mem->bo, dst_buffer->mem_offset,
1167 src_buffer->mem->bo, src_buffer->mem_offset,
1168 &pCopyBufferInfo->pRegions[i]);
1169 }
1170 }
1171
1172 static void
destroy_update_buffer_cb(VkDevice _device,uint64_t pobj,VkAllocationCallbacks * alloc)1173 destroy_update_buffer_cb(VkDevice _device,
1174 uint64_t pobj,
1175 VkAllocationCallbacks *alloc)
1176 {
1177 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1178 struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1179 v3dv_bo_free(device, bo);
1180 }
1181
1182 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1183 v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1184 VkBuffer dstBuffer,
1185 VkDeviceSize dstOffset,
1186 VkDeviceSize dataSize,
1187 const void *pData)
1188 {
1189 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1190 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1191
1192 struct v3dv_bo *src_bo =
1193 v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1194 if (!src_bo) {
1195 fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1196 return;
1197 }
1198
1199 bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1200 if (!ok) {
1201 fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1202 return;
1203 }
1204
1205 memcpy(src_bo->map, pData, dataSize);
1206
1207 v3dv_bo_unmap(cmd_buffer->device, src_bo);
1208
1209 VkBufferCopy2KHR region = {
1210 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
1211 .srcOffset = 0,
1212 .dstOffset = dstOffset,
1213 .size = dataSize,
1214 };
1215 struct v3dv_job *copy_job =
1216 v3dv_X(cmd_buffer->device, meta_copy_buffer)
1217 (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1218 src_bo, 0, ®ion);
1219
1220 if (!copy_job)
1221 return;
1222
1223 v3dv_cmd_buffer_add_private_obj(
1224 cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1225 }
1226
1227 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize size,uint32_t data)1228 v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1229 VkBuffer dstBuffer,
1230 VkDeviceSize dstOffset,
1231 VkDeviceSize size,
1232 uint32_t data)
1233 {
1234 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1235 V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1236
1237 struct v3dv_bo *bo = dst_buffer->mem->bo;
1238
1239 /* From the Vulkan spec:
1240 *
1241 * "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1242 * a multiple of 4, then the nearest smaller multiple is used."
1243 */
1244 if (size == VK_WHOLE_SIZE) {
1245 size = dst_buffer->size - dstOffset;
1246 size -= size % 4;
1247 }
1248
1249 v3dv_X(cmd_buffer->device, meta_fill_buffer)
1250 (cmd_buffer, bo, dstOffset, size, data);
1251 }
1252
1253 /**
1254 * Returns true if the implementation supports the requested operation (even if
1255 * it failed to process it, for example, due to an out-of-memory error).
1256 */
1257 static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2KHR * region)1258 copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1259 struct v3dv_image *image,
1260 struct v3dv_buffer *buffer,
1261 const VkBufferImageCopy2KHR *region)
1262 {
1263 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
1264
1265 /* Destination can't be raster format */
1266 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
1267 return false;
1268
1269 /* We can't copy D24S8 because buffer to image copies only copy one aspect
1270 * at a time, and the TFU copies full images. Also, V3D depth bits for
1271 * both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1272 * the Vulkan spec has the buffer data specified the other way around, so it
1273 * is not a straight copy, we would havew to swizzle the channels, which the
1274 * TFU can't do.
1275 */
1276 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
1277 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1278 return false;
1279 }
1280
1281 /* Region must include full slice */
1282 const uint32_t offset_x = region->imageOffset.x;
1283 const uint32_t offset_y = region->imageOffset.y;
1284 if (offset_x != 0 || offset_y != 0)
1285 return false;
1286
1287 uint32_t width, height;
1288 if (region->bufferRowLength == 0)
1289 width = region->imageExtent.width;
1290 else
1291 width = region->bufferRowLength;
1292
1293 if (region->bufferImageHeight == 0)
1294 height = region->imageExtent.height;
1295 else
1296 height = region->bufferImageHeight;
1297
1298 if (width != image->vk.extent.width || height != image->vk.extent.height)
1299 return false;
1300
1301 /* Handle region semantics for compressed images */
1302 const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
1303 const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
1304 width = DIV_ROUND_UP(width, block_w);
1305 height = DIV_ROUND_UP(height, block_h);
1306
1307 /* Format must be supported for texturing via the TFU. Since we are just
1308 * copying raw data and not converting between pixel formats, we can ignore
1309 * the image's format and choose a compatible TFU format for the image
1310 * texel size instead, which expands the list of formats we can handle here.
1311 */
1312 const struct v3dv_format *format =
1313 v3dv_get_compatible_tfu_format(cmd_buffer->device,
1314 image->cpp, NULL);
1315
1316 const uint32_t mip_level = region->imageSubresource.mipLevel;
1317 const struct v3d_resource_slice *slice = &image->slices[mip_level];
1318
1319 uint32_t num_layers;
1320 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1321 num_layers = region->imageSubresource.layerCount;
1322 else
1323 num_layers = region->imageExtent.depth;
1324 assert(num_layers > 0);
1325
1326 assert(image->mem && image->mem->bo);
1327 const struct v3dv_bo *dst_bo = image->mem->bo;
1328
1329 assert(buffer->mem && buffer->mem->bo);
1330 const struct v3dv_bo *src_bo = buffer->mem->bo;
1331
1332 /* Emit a TFU job per layer to copy */
1333 const uint32_t buffer_stride = width * image->cpp;
1334 for (int i = 0; i < num_layers; i++) {
1335 uint32_t layer;
1336 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1337 layer = region->imageSubresource.baseArrayLayer + i;
1338 else
1339 layer = region->imageOffset.z + i;
1340
1341 struct drm_v3d_submit_tfu tfu = {
1342 .ios = (height << 16) | width,
1343 .bo_handles = {
1344 dst_bo->handle,
1345 src_bo->handle != dst_bo->handle ? src_bo->handle : 0
1346 },
1347 };
1348
1349 const uint32_t buffer_offset =
1350 buffer->mem_offset + region->bufferOffset +
1351 height * buffer_stride * i;
1352
1353 const uint32_t src_offset = src_bo->offset + buffer_offset;
1354 tfu.iia |= src_offset;
1355 tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;
1356 tfu.iis |= width;
1357
1358 const uint32_t dst_offset =
1359 dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
1360 tfu.ioa |= dst_offset;
1361
1362 tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
1363 (slice->tiling - V3D_TILING_LINEARTILE)) <<
1364 V3D_TFU_IOA_FORMAT_SHIFT;
1365 tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
1366
1367 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
1368 * OPAD field for the destination (how many extra UIF blocks beyond
1369 * those necessary to cover the height).
1370 */
1371 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
1372 slice->tiling == V3D_TILING_UIF_XOR) {
1373 uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
1374 uint32_t implicit_padded_height = align(height, uif_block_h);
1375 uint32_t icfg =
1376 (slice->padded_height - implicit_padded_height) / uif_block_h;
1377 tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
1378 }
1379
1380 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
1381 }
1382
1383 return true;
1384 }
1385
1386 /**
1387 * Returns true if the implementation supports the requested operation (even if
1388 * it failed to process it, for example, due to an out-of-memory error).
1389 */
1390 static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2KHR * region)1391 copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1392 struct v3dv_image *image,
1393 struct v3dv_buffer *buffer,
1394 const VkBufferImageCopy2KHR *region)
1395 {
1396 VkFormat fb_format;
1397 if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format))
1398 return false;
1399
1400 uint32_t internal_type, internal_bpp;
1401 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1402 (fb_format, region->imageSubresource.aspectMask,
1403 &internal_type, &internal_bpp);
1404
1405 uint32_t num_layers;
1406 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
1407 num_layers = region->imageSubresource.layerCount;
1408 else
1409 num_layers = region->imageExtent.depth;
1410 assert(num_layers > 0);
1411
1412 struct v3dv_job *job =
1413 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1414 if (!job)
1415 return true;
1416
1417 /* Handle copy to compressed format using a compatible format */
1418 const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
1419 const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
1420 const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
1421 const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
1422
1423 v3dv_job_start_frame(job, width, height, num_layers, false,
1424 1, internal_bpp, false);
1425
1426 struct v3dv_meta_framebuffer framebuffer;
1427 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
1428 internal_type, &job->frame_tiling);
1429
1430 v3dv_X(job->device, job_emit_binning_flush)(job);
1431 v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
1432 (job, image, buffer, &framebuffer, region);
1433
1434 v3dv_cmd_buffer_finish_job(cmd_buffer);
1435
1436 return true;
1437 }
1438
1439 static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2KHR * region)1440 create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1441 struct v3dv_image *image,
1442 struct v3dv_buffer *buffer,
1443 const VkBufferImageCopy2KHR *region)
1444 {
1445 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
1446 return true;
1447 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
1448 return true;
1449 return false;
1450 }
1451
1452 static VkResult
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)1453 create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
1454 {
1455 /* If this is not the first pool we create for this command buffer
1456 * size it based on the size of the currently exhausted pool.
1457 */
1458 uint32_t descriptor_count = 64;
1459 if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
1460 struct v3dv_descriptor_pool *exhausted_pool =
1461 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
1462 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
1463 }
1464
1465 /* Create the descriptor pool */
1466 cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
1467 VkDescriptorPoolSize pool_size = {
1468 .type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1469 .descriptorCount = descriptor_count,
1470 };
1471 VkDescriptorPoolCreateInfo info = {
1472 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1473 .maxSets = descriptor_count,
1474 .poolSizeCount = 1,
1475 .pPoolSizes = &pool_size,
1476 .flags = 0,
1477 };
1478 VkResult result =
1479 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
1480 &info,
1481 &cmd_buffer->device->vk.alloc,
1482 &cmd_buffer->meta.texel_buffer_copy.dspool);
1483
1484 if (result == VK_SUCCESS) {
1485 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1486 const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
1487
1488 v3dv_cmd_buffer_add_private_obj(
1489 cmd_buffer, (uintptr_t) _pool,
1490 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
1491
1492 struct v3dv_descriptor_pool *pool =
1493 v3dv_descriptor_pool_from_handle(_pool);
1494 pool->is_driver_internal = true;
1495 }
1496
1497 return result;
1498 }
1499
1500 static VkResult
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)1501 allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
1502 VkDescriptorSet *set)
1503 {
1504 /* Make sure we have a descriptor pool */
1505 VkResult result;
1506 if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
1507 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1508 if (result != VK_SUCCESS)
1509 return result;
1510 }
1511 assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1512
1513 /* Allocate descriptor set */
1514 struct v3dv_device *device = cmd_buffer->device;
1515 VkDevice _device = v3dv_device_to_handle(device);
1516 VkDescriptorSetAllocateInfo info = {
1517 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1518 .descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
1519 .descriptorSetCount = 1,
1520 .pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
1521 };
1522 result = v3dv_AllocateDescriptorSets(_device, &info, set);
1523
1524 /* If we ran out of pool space, grow the pool and try again */
1525 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
1526 result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1527 if (result == VK_SUCCESS) {
1528 info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
1529 result = v3dv_AllocateDescriptorSets(_device, &info, set);
1530 }
1531 }
1532
1533 return result;
1534 }
1535
1536 static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,uint8_t * key)1537 get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
1538 VkColorComponentFlags cmask,
1539 VkComponentMapping *cswizzle,
1540 bool is_layered,
1541 uint8_t *key)
1542 {
1543 memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1544
1545 uint32_t *p = (uint32_t *) key;
1546
1547 *p = format;
1548 p++;
1549
1550 *p = cmask;
1551 p++;
1552
1553 /* Note that that we are using a single byte for this, so we could pack
1554 * more data into this 32-bit slot in the future.
1555 */
1556 *p = is_layered ? 1 : 0;
1557 p++;
1558
1559 memcpy(p, cswizzle, sizeof(VkComponentMapping));
1560 p += sizeof(VkComponentMapping) / sizeof(uint32_t);
1561
1562 assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1563 }
1564
1565 static bool
1566 create_blit_render_pass(struct v3dv_device *device,
1567 VkFormat dst_format,
1568 VkFormat src_format,
1569 VkRenderPass *pass_load,
1570 VkRenderPass *pass_no_load);
1571
1572 static nir_ssa_def *gen_rect_vertices(nir_builder *b);
1573
1574 static bool
1575 create_pipeline(struct v3dv_device *device,
1576 struct v3dv_render_pass *pass,
1577 struct nir_shader *vs_nir,
1578 struct nir_shader *gs_nir,
1579 struct nir_shader *fs_nir,
1580 const VkPipelineVertexInputStateCreateInfo *vi_state,
1581 const VkPipelineDepthStencilStateCreateInfo *ds_state,
1582 const VkPipelineColorBlendStateCreateInfo *cb_state,
1583 const VkPipelineMultisampleStateCreateInfo *ms_state,
1584 const VkPipelineLayout layout,
1585 VkPipeline *pipeline);
1586
1587 static nir_shader *
get_texel_buffer_copy_vs()1588 get_texel_buffer_copy_vs()
1589 {
1590 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1591 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
1592 "meta texel buffer copy vs");
1593 nir_variable *vs_out_pos =
1594 nir_variable_create(b.shader, nir_var_shader_out,
1595 glsl_vec4_type(), "gl_Position");
1596 vs_out_pos->data.location = VARYING_SLOT_POS;
1597
1598 nir_ssa_def *pos = gen_rect_vertices(&b);
1599 nir_store_var(&b, vs_out_pos, pos, 0xf);
1600
1601 return b.shader;
1602 }
1603
1604 static nir_shader *
get_texel_buffer_copy_gs()1605 get_texel_buffer_copy_gs()
1606 {
1607 /* FIXME: this creates a geometry shader that takes the index of a single
1608 * layer to clear from push constants, so we need to emit a draw call for
1609 * each layer that we want to clear. We could actually do better and have it
1610 * take a range of layers however, if we were to do this, we would need to
1611 * be careful not to exceed the maximum number of output vertices allowed in
1612 * a geometry shader.
1613 */
1614 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1615 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
1616 "meta texel buffer copy gs");
1617 nir_shader *nir = b.shader;
1618 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
1619 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
1620 (1ull << VARYING_SLOT_LAYER);
1621 nir->info.gs.input_primitive = GL_TRIANGLES;
1622 nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
1623 nir->info.gs.vertices_in = 3;
1624 nir->info.gs.vertices_out = 3;
1625 nir->info.gs.invocations = 1;
1626 nir->info.gs.active_stream_mask = 0x1;
1627
1628 /* in vec4 gl_Position[3] */
1629 nir_variable *gs_in_pos =
1630 nir_variable_create(b.shader, nir_var_shader_in,
1631 glsl_array_type(glsl_vec4_type(), 3, 0),
1632 "in_gl_Position");
1633 gs_in_pos->data.location = VARYING_SLOT_POS;
1634
1635 /* out vec4 gl_Position */
1636 nir_variable *gs_out_pos =
1637 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
1638 "out_gl_Position");
1639 gs_out_pos->data.location = VARYING_SLOT_POS;
1640
1641 /* out float gl_Layer */
1642 nir_variable *gs_out_layer =
1643 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
1644 "out_gl_Layer");
1645 gs_out_layer->data.location = VARYING_SLOT_LAYER;
1646
1647 /* Emit output triangle */
1648 for (uint32_t i = 0; i < 3; i++) {
1649 /* gl_Position from shader input */
1650 nir_deref_instr *in_pos_i =
1651 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
1652 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
1653
1654 /* gl_Layer from push constants */
1655 nir_ssa_def *layer =
1656 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1657 .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
1658 .range = 4);
1659 nir_store_var(&b, gs_out_layer, layer, 0x1);
1660
1661 nir_emit_vertex(&b, 0);
1662 }
1663
1664 nir_end_primitive(&b, 0);
1665
1666 return nir;
1667 }
1668
1669 static nir_ssa_def *
load_frag_coord(nir_builder * b)1670 load_frag_coord(nir_builder *b)
1671 {
1672 nir_foreach_shader_in_variable(var, b->shader) {
1673 if (var->data.location == VARYING_SLOT_POS)
1674 return nir_load_var(b, var);
1675 }
1676 nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
1677 glsl_vec4_type(), NULL);
1678 pos->data.location = VARYING_SLOT_POS;
1679 return nir_load_var(b, pos);
1680 }
1681
1682 static uint32_t
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp,VkComponentSwizzle swz)1683 component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
1684 {
1685 if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
1686 swz = comp;
1687
1688 switch (swz) {
1689 case VK_COMPONENT_SWIZZLE_R:
1690 return 0;
1691 case VK_COMPONENT_SWIZZLE_G:
1692 return 1;
1693 case VK_COMPONENT_SWIZZLE_B:
1694 return 2;
1695 case VK_COMPONENT_SWIZZLE_A:
1696 return 3;
1697 default:
1698 unreachable("Invalid swizzle");
1699 };
1700 }
1701
1702 static nir_shader *
get_texel_buffer_copy_fs(struct v3dv_device * device,VkFormat format,VkComponentMapping * cswizzle)1703 get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
1704 VkComponentMapping *cswizzle)
1705 {
1706 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1707 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
1708 "meta texel buffer copy fs");
1709
1710 /* We only use the copy from texel buffer shader to implement
1711 * copy_buffer_to_image_shader, which always selects a compatible integer
1712 * format for the copy.
1713 */
1714 assert(vk_format_is_int(format));
1715
1716 /* Fragment shader output color */
1717 nir_variable *fs_out_color =
1718 nir_variable_create(b.shader, nir_var_shader_out,
1719 glsl_uvec4_type(), "out_color");
1720 fs_out_color->data.location = FRAG_RESULT_DATA0;
1721
1722 /* Texel buffer input */
1723 const struct glsl_type *sampler_type =
1724 glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
1725 nir_variable *sampler =
1726 nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
1727 sampler->data.descriptor_set = 0;
1728 sampler->data.binding = 0;
1729
1730 /* Load the box describing the pixel region we want to copy from the
1731 * texel buffer.
1732 */
1733 nir_ssa_def *box =
1734 nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
1735 .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
1736 .range = 16);
1737
1738 /* Load the buffer stride (this comes in texel units) */
1739 nir_ssa_def *stride =
1740 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1741 .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
1742 .range = 4);
1743
1744 /* Load the buffer offset (this comes in texel units) */
1745 nir_ssa_def *offset =
1746 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1747 .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
1748 .range = 4);
1749
1750 nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
1751
1752 /* Load pixel data from texel buffer based on the x,y offset of the pixel
1753 * within the box. Texel buffers are 1D arrays of texels.
1754 *
1755 * Notice that we already make sure that we only generate fragments that are
1756 * inside the box through the scissor/viewport state, so our offset into the
1757 * texel buffer should always be within its bounds and we we don't need
1758 * to add a check for that here.
1759 */
1760 nir_ssa_def *x_offset =
1761 nir_isub(&b, nir_channel(&b, coord, 0),
1762 nir_channel(&b, box, 0));
1763 nir_ssa_def *y_offset =
1764 nir_isub(&b, nir_channel(&b, coord, 1),
1765 nir_channel(&b, box, 1));
1766 nir_ssa_def *texel_offset =
1767 nir_iadd(&b, nir_iadd(&b, offset, x_offset),
1768 nir_imul(&b, y_offset, stride));
1769
1770 nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
1771 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
1772 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1773 tex->op = nir_texop_txf;
1774 tex->src[0].src_type = nir_tex_src_coord;
1775 tex->src[0].src = nir_src_for_ssa(texel_offset);
1776 tex->src[1].src_type = nir_tex_src_texture_deref;
1777 tex->src[1].src = nir_src_for_ssa(tex_deref);
1778 tex->dest_type = nir_type_uint32;
1779 tex->is_array = false;
1780 tex->coord_components = 1;
1781 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
1782 nir_builder_instr_insert(&b, &tex->instr);
1783
1784 uint32_t swiz[4];
1785 swiz[0] =
1786 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
1787 swiz[1] =
1788 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
1789 swiz[2] =
1790 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
1791 swiz[3] =
1792 component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
1793 nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
1794 nir_store_var(&b, fs_out_color, s, 0xf);
1795
1796 return b.shader;
1797 }
1798
1799 static bool
create_texel_buffer_copy_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,bool is_layered,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)1800 create_texel_buffer_copy_pipeline(struct v3dv_device *device,
1801 VkFormat format,
1802 VkColorComponentFlags cmask,
1803 VkComponentMapping *cswizzle,
1804 bool is_layered,
1805 VkRenderPass _pass,
1806 VkPipelineLayout pipeline_layout,
1807 VkPipeline *pipeline)
1808 {
1809 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
1810
1811 assert(vk_format_is_color(format));
1812
1813 nir_shader *vs_nir = get_texel_buffer_copy_vs();
1814 nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
1815 nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
1816
1817 const VkPipelineVertexInputStateCreateInfo vi_state = {
1818 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
1819 .vertexBindingDescriptionCount = 0,
1820 .vertexAttributeDescriptionCount = 0,
1821 };
1822
1823 VkPipelineDepthStencilStateCreateInfo ds_state = {
1824 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
1825 };
1826
1827 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
1828 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
1829 .blendEnable = false,
1830 .colorWriteMask = cmask,
1831 };
1832
1833 const VkPipelineColorBlendStateCreateInfo cb_state = {
1834 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1835 .logicOpEnable = false,
1836 .attachmentCount = 1,
1837 .pAttachments = blend_att_state
1838 };
1839
1840 const VkPipelineMultisampleStateCreateInfo ms_state = {
1841 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
1842 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
1843 .sampleShadingEnable = false,
1844 .pSampleMask = NULL,
1845 .alphaToCoverageEnable = false,
1846 .alphaToOneEnable = false,
1847 };
1848
1849 return create_pipeline(device,
1850 pass,
1851 vs_nir, gs_nir, fs_nir,
1852 &vi_state,
1853 &ds_state,
1854 &cb_state,
1855 &ms_state,
1856 pipeline_layout,
1857 pipeline);
1858 }
1859
1860 static bool
get_copy_texel_buffer_pipeline(struct v3dv_device * device,VkFormat format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,VkImageType image_type,bool is_layered,struct v3dv_meta_texel_buffer_copy_pipeline ** pipeline)1861 get_copy_texel_buffer_pipeline(
1862 struct v3dv_device *device,
1863 VkFormat format,
1864 VkColorComponentFlags cmask,
1865 VkComponentMapping *cswizzle,
1866 VkImageType image_type,
1867 bool is_layered,
1868 struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
1869 {
1870 bool ok = true;
1871
1872 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
1873 get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
1874 key);
1875
1876 mtx_lock(&device->meta.mtx);
1877 struct hash_entry *entry =
1878 _mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
1879 &key);
1880 if (entry) {
1881 mtx_unlock(&device->meta.mtx);
1882 *pipeline = entry->data;
1883 return true;
1884 }
1885
1886 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
1887 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1888
1889 if (*pipeline == NULL)
1890 goto fail;
1891
1892 /* The blit render pass is compatible */
1893 ok = create_blit_render_pass(device, format, format,
1894 &(*pipeline)->pass,
1895 &(*pipeline)->pass_no_load);
1896 if (!ok)
1897 goto fail;
1898
1899 ok =
1900 create_texel_buffer_copy_pipeline(device,
1901 format, cmask, cswizzle, is_layered,
1902 (*pipeline)->pass,
1903 device->meta.texel_buffer_copy.p_layout,
1904 &(*pipeline)->pipeline);
1905 if (!ok)
1906 goto fail;
1907
1908 _mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
1909 &key, *pipeline);
1910
1911 mtx_unlock(&device->meta.mtx);
1912 return true;
1913
1914 fail:
1915 mtx_unlock(&device->meta.mtx);
1916
1917 VkDevice _device = v3dv_device_to_handle(device);
1918 if (*pipeline) {
1919 if ((*pipeline)->pass)
1920 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
1921 if ((*pipeline)->pipeline)
1922 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1923 vk_free(&device->vk.alloc, *pipeline);
1924 *pipeline = NULL;
1925 }
1926
1927 return false;
1928 }
1929
1930 static bool
texel_buffer_shader_copy(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2KHR * regions)1931 texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
1932 VkImageAspectFlags aspect,
1933 struct v3dv_image *image,
1934 VkFormat dst_format,
1935 VkFormat src_format,
1936 struct v3dv_buffer *buffer,
1937 uint32_t buffer_bpp,
1938 VkColorComponentFlags cmask,
1939 VkComponentMapping *cswizzle,
1940 uint32_t region_count,
1941 const VkBufferImageCopy2KHR *regions)
1942 {
1943 VkResult result;
1944 bool handled = false;
1945
1946 assert(cswizzle);
1947
1948 /* This is a copy path, so we don't handle format conversions. The only
1949 * exception are stencil to D24S8 copies, which are handled as a color
1950 * masked R8->RGBA8 copy.
1951 */
1952 assert(src_format == dst_format ||
1953 (dst_format == VK_FORMAT_R8G8B8A8_UINT &&
1954 src_format == VK_FORMAT_R8_UINT &&
1955 cmask == VK_COLOR_COMPONENT_R_BIT));
1956
1957 /* We only handle color copies. Callers can copy D/S aspects by using
1958 * a compatible color format and maybe a cmask/cswizzle for D24 formats.
1959 */
1960 if (aspect != VK_IMAGE_ASPECT_COLOR_BIT)
1961 return handled;
1962
1963 /* FIXME: we only handle uncompressed images for now. */
1964 if (vk_format_is_compressed(image->vk.format))
1965 return handled;
1966
1967 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
1968 VK_COLOR_COMPONENT_G_BIT |
1969 VK_COLOR_COMPONENT_B_BIT |
1970 VK_COLOR_COMPONENT_A_BIT;
1971 if (cmask == 0)
1972 cmask = full_cmask;
1973
1974 /* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
1975 * so we can bind it as a texel buffer. Otherwise, the buffer view
1976 * we create below won't setup the texture state that we need for this.
1977 */
1978 if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
1979 if (v3dv_buffer_format_supports_features(
1980 cmd_buffer->device, src_format,
1981 VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
1982 buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
1983 } else {
1984 return handled;
1985 }
1986 }
1987
1988 /* At this point we should be able to handle the copy unless an unexpected
1989 * error occurs, such as an OOM.
1990 */
1991 handled = true;
1992
1993
1994 /* Compute the number of layers to copy.
1995 *
1996 * If we are batching (region_count > 1) all our regions have the same
1997 * image subresource so we can take this from the first region. For 3D
1998 * images we require the same depth extent.
1999 */
2000 const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;
2001 uint32_t num_layers;
2002 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
2003 num_layers = resource->layerCount;
2004 } else {
2005 assert(region_count == 1);
2006 num_layers = regions[0].imageExtent.depth;
2007 }
2008 assert(num_layers > 0);
2009
2010 /* Get the texel buffer copy pipeline */
2011 struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2012 bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
2013 dst_format, cmask, cswizzle,
2014 image->vk.image_type, num_layers > 1,
2015 &pipeline);
2016 if (!ok)
2017 return handled;
2018 assert(pipeline && pipeline->pipeline && pipeline->pass);
2019
2020 /* Setup descriptor set for the source texel buffer. We don't have to
2021 * register the descriptor as a private command buffer object since
2022 * all descriptors will be freed automatically with the descriptor
2023 * pool.
2024 */
2025 VkDescriptorSet set;
2026 result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2027 if (result != VK_SUCCESS)
2028 return handled;
2029
2030 /* FIXME: for some reason passing region->bufferOffset here for the
2031 * offset field doesn't work, making the following CTS tests fail:
2032 *
2033 * dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset*
2034 *
2035 * So instead we pass 0 here and we pass the offset in texels as a push
2036 * constant to the shader, which seems to work correctly.
2037 */
2038 VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2039 VkBufferViewCreateInfo buffer_view_info = {
2040 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2041 .buffer = v3dv_buffer_to_handle(buffer),
2042 .format = src_format,
2043 .offset = 0,
2044 .range = VK_WHOLE_SIZE,
2045 };
2046
2047 VkBufferView texel_buffer_view;
2048 result = v3dv_CreateBufferView(_device, &buffer_view_info,
2049 &cmd_buffer->device->vk.alloc,
2050 &texel_buffer_view);
2051 if (result != VK_SUCCESS)
2052 return handled;
2053
2054 v3dv_cmd_buffer_add_private_obj(
2055 cmd_buffer, (uintptr_t)texel_buffer_view,
2056 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2057
2058 VkWriteDescriptorSet write = {
2059 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2060 .dstSet = set,
2061 .dstBinding = 0,
2062 .dstArrayElement = 0,
2063 .descriptorCount = 1,
2064 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2065 .pTexelBufferView = &texel_buffer_view,
2066 };
2067 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2068
2069 /* Push command buffer state before starting meta operation */
2070 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2071 uint32_t dirty_dynamic_state = 0;
2072
2073 /* Bind common state for all layers and regions */
2074 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2075 v3dv_CmdBindPipeline(_cmd_buffer,
2076 VK_PIPELINE_BIND_POINT_GRAPHICS,
2077 pipeline->pipeline);
2078
2079 v3dv_CmdBindDescriptorSets(_cmd_buffer,
2080 VK_PIPELINE_BIND_POINT_GRAPHICS,
2081 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2082 0, 1, &set,
2083 0, NULL);
2084
2085 /* Setup framebuffer.
2086 *
2087 * For 3D images, this creates a layered framebuffer with a number of
2088 * layers matching the depth extent of the 3D image.
2089 */
2090 uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel);
2091 uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel);
2092 VkImageViewCreateInfo image_view_info = {
2093 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2094 .image = v3dv_image_to_handle(image),
2095 .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
2096 .format = dst_format,
2097 .subresourceRange = {
2098 .aspectMask = aspect,
2099 .baseMipLevel = resource->mipLevel,
2100 .levelCount = 1,
2101 .baseArrayLayer = resource->baseArrayLayer,
2102 .layerCount = num_layers,
2103 },
2104 };
2105 VkImageView image_view;
2106 result = v3dv_CreateImageView(_device, &image_view_info,
2107 &cmd_buffer->device->vk.alloc, &image_view);
2108 if (result != VK_SUCCESS)
2109 goto fail;
2110
2111 v3dv_cmd_buffer_add_private_obj(
2112 cmd_buffer, (uintptr_t)image_view,
2113 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2114
2115 VkFramebufferCreateInfo fb_info = {
2116 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2117 .renderPass = pipeline->pass,
2118 .attachmentCount = 1,
2119 .pAttachments = &image_view,
2120 .width = fb_width,
2121 .height = fb_height,
2122 .layers = num_layers,
2123 };
2124
2125 VkFramebuffer fb;
2126 result = v3dv_CreateFramebuffer(_device, &fb_info,
2127 &cmd_buffer->device->vk.alloc, &fb);
2128 if (result != VK_SUCCESS)
2129 goto fail;
2130
2131 v3dv_cmd_buffer_add_private_obj(
2132 cmd_buffer, (uintptr_t)fb,
2133 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2134
2135 /* For each layer */
2136 for (uint32_t l = 0; l < num_layers; l++) {
2137 /* Start render pass for this layer.
2138 *
2139 * If the we only have one region to copy, then we might be able to
2140 * skip the TLB load if it is aligned to tile boundaries. All layers
2141 * copy the same area, so we only need to check this once.
2142 */
2143 bool can_skip_tlb_load = false;
2144 VkRect2D render_area;
2145 if (region_count == 1) {
2146 render_area.offset.x = regions[0].imageOffset.x;
2147 render_area.offset.y = regions[0].imageOffset.y;
2148 render_area.extent.width = regions[0].imageExtent.width;
2149 render_area.extent.height = regions[0].imageExtent.height;
2150
2151 if (l == 0) {
2152 struct v3dv_render_pass *pipeline_pass =
2153 v3dv_render_pass_from_handle(pipeline->pass);
2154 can_skip_tlb_load =
2155 cmask == full_cmask &&
2156 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2157 v3dv_framebuffer_from_handle(fb),
2158 pipeline_pass, 0);
2159 }
2160 } else {
2161 render_area.offset.x = 0;
2162 render_area.offset.y = 0;
2163 render_area.extent.width = fb_width;
2164 render_area.extent.height = fb_height;
2165 }
2166
2167 VkRenderPassBeginInfo rp_info = {
2168 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2169 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2170 pipeline->pass,
2171 .framebuffer = fb,
2172 .renderArea = render_area,
2173 .clearValueCount = 0,
2174 };
2175
2176 v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
2177 struct v3dv_job *job = cmd_buffer->state.job;
2178 if (!job)
2179 goto fail;
2180
2181 /* If we are using a layered copy we need to specify the layer for the
2182 * Geometry Shader.
2183 */
2184 if (num_layers > 1) {
2185 uint32_t layer = resource->baseArrayLayer + l;
2186 v3dv_CmdPushConstants(_cmd_buffer,
2187 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2188 VK_SHADER_STAGE_GEOMETRY_BIT,
2189 24, 4, &layer);
2190 }
2191
2192 /* For each region */
2193 dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
2194 for (uint32_t r = 0; r < region_count; r++) {
2195 const VkBufferImageCopy2KHR *region = ®ions[r];
2196
2197 /* Obtain the 2D buffer region spec */
2198 uint32_t buf_width, buf_height;
2199 if (region->bufferRowLength == 0)
2200 buf_width = region->imageExtent.width;
2201 else
2202 buf_width = region->bufferRowLength;
2203
2204 if (region->bufferImageHeight == 0)
2205 buf_height = region->imageExtent.height;
2206 else
2207 buf_height = region->bufferImageHeight;
2208
2209 const VkViewport viewport = {
2210 .x = region->imageOffset.x,
2211 .y = region->imageOffset.y,
2212 .width = region->imageExtent.width,
2213 .height = region->imageExtent.height,
2214 .minDepth = 0.0f,
2215 .maxDepth = 1.0f
2216 };
2217 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2218 const VkRect2D scissor = {
2219 .offset = { region->imageOffset.x, region->imageOffset.y },
2220 .extent = { region->imageExtent.width, region->imageExtent.height }
2221 };
2222 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2223
2224 const VkDeviceSize buf_offset =
2225 region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2226 uint32_t push_data[6] = {
2227 region->imageOffset.x,
2228 region->imageOffset.y,
2229 region->imageOffset.x + region->imageExtent.width - 1,
2230 region->imageOffset.y + region->imageExtent.height - 1,
2231 buf_width,
2232 buf_offset,
2233 };
2234
2235 v3dv_CmdPushConstants(_cmd_buffer,
2236 cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2237 VK_SHADER_STAGE_FRAGMENT_BIT,
2238 0, sizeof(push_data), &push_data);
2239
2240 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2241 } /* For each region */
2242
2243 v3dv_CmdEndRenderPass(_cmd_buffer);
2244 } /* For each layer */
2245
2246 fail:
2247 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
2248 return handled;
2249 }
2250
2251 /**
2252 * Returns true if the implementation supports the requested operation (even if
2253 * it failed to process it, for example, due to an out-of-memory error).
2254 */
2255 static bool
copy_buffer_to_image_blit(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspect,struct v3dv_image * image,VkFormat dst_format,VkFormat src_format,struct v3dv_buffer * buffer,uint32_t buffer_bpp,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,uint32_t region_count,const VkBufferImageCopy2KHR * regions)2256 copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2257 VkImageAspectFlags aspect,
2258 struct v3dv_image *image,
2259 VkFormat dst_format,
2260 VkFormat src_format,
2261 struct v3dv_buffer *buffer,
2262 uint32_t buffer_bpp,
2263 VkColorComponentFlags cmask,
2264 VkComponentMapping *cswizzle,
2265 uint32_t region_count,
2266 const VkBufferImageCopy2KHR *regions)
2267 {
2268 /* Since we can't sample linear images we need to upload the linear
2269 * buffer to a tiled image that we can use as a blit source, which
2270 * is slow.
2271 */
2272 perf_debug("Falling back to blit path for buffer to image copy.\n");
2273
2274 struct v3dv_device *device = cmd_buffer->device;
2275 VkDevice _device = v3dv_device_to_handle(device);
2276 bool handled = true;
2277
2278 /* Allocate memory for the tiled image. Since we copy layer by layer
2279 * we allocate memory to hold a full layer, which is the worse case.
2280 * For that we create a dummy image with that spec, get memory requirements
2281 * for it and use that information to create the memory allocation.
2282 * We will then reuse this memory store for all the regions we want to
2283 * copy.
2284 */
2285 VkImage dummy_image;
2286 VkImageCreateInfo dummy_info = {
2287 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2288 .imageType = VK_IMAGE_TYPE_2D,
2289 .format = src_format,
2290 .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
2291 .mipLevels = 1,
2292 .arrayLayers = 1,
2293 .samples = VK_SAMPLE_COUNT_1_BIT,
2294 .tiling = VK_IMAGE_TILING_OPTIMAL,
2295 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2296 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2297 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2298 .queueFamilyIndexCount = 0,
2299 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2300 };
2301 VkResult result =
2302 v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2303 if (result != VK_SUCCESS)
2304 return handled;
2305
2306 VkMemoryRequirements reqs;
2307 vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2308 v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2309
2310 VkDeviceMemory mem;
2311 VkMemoryAllocateInfo alloc_info = {
2312 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2313 .allocationSize = reqs.size,
2314 .memoryTypeIndex = 0,
2315 };
2316 result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2317 if (result != VK_SUCCESS)
2318 return handled;
2319
2320 v3dv_cmd_buffer_add_private_obj(
2321 cmd_buffer, (uintptr_t)mem,
2322 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2323
2324 /* Obtain the layer count.
2325 *
2326 * If we are batching (region_count > 1) all our regions have the same
2327 * image subresource so we can take this from the first region.
2328 */
2329 uint32_t num_layers;
2330 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2331 num_layers = regions[0].imageSubresource.layerCount;
2332 else
2333 num_layers = regions[0].imageExtent.depth;
2334 assert(num_layers > 0);
2335
2336 /* Sanity check: we can only batch multiple regions together if they have
2337 * the same framebuffer (so the same layer).
2338 */
2339 assert(num_layers == 1 || region_count == 1);
2340
2341 const uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
2342 const uint32_t block_height = vk_format_get_blockheight(image->vk.format);
2343
2344 /* Copy regions by uploading each region to a temporary tiled image using
2345 * the memory we have just allocated as storage.
2346 */
2347 for (uint32_t r = 0; r < region_count; r++) {
2348 const VkBufferImageCopy2KHR *region = ®ions[r];
2349
2350 /* Obtain the 2D buffer region spec */
2351 uint32_t buf_width, buf_height;
2352 if (region->bufferRowLength == 0)
2353 buf_width = region->imageExtent.width;
2354 else
2355 buf_width = region->bufferRowLength;
2356
2357 if (region->bufferImageHeight == 0)
2358 buf_height = region->imageExtent.height;
2359 else
2360 buf_height = region->bufferImageHeight;
2361
2362 /* If the image is compressed, the bpp refers to blocks, not pixels */
2363 buf_width = buf_width / block_width;
2364 buf_height = buf_height / block_height;
2365
2366 for (uint32_t i = 0; i < num_layers; i++) {
2367 /* Create the tiled image */
2368 VkImageCreateInfo image_info = {
2369 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2370 .imageType = VK_IMAGE_TYPE_2D,
2371 .format = src_format,
2372 .extent = { buf_width, buf_height, 1 },
2373 .mipLevels = 1,
2374 .arrayLayers = 1,
2375 .samples = VK_SAMPLE_COUNT_1_BIT,
2376 .tiling = VK_IMAGE_TILING_OPTIMAL,
2377 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2378 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2379 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2380 .queueFamilyIndexCount = 0,
2381 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2382 };
2383
2384 VkImage buffer_image;
2385 VkResult result =
2386 v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
2387 &buffer_image);
2388 if (result != VK_SUCCESS)
2389 return handled;
2390
2391 v3dv_cmd_buffer_add_private_obj(
2392 cmd_buffer, (uintptr_t)buffer_image,
2393 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2394
2395 result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
2396 if (result != VK_SUCCESS)
2397 return handled;
2398
2399 /* Upload buffer contents for the selected layer */
2400 const VkDeviceSize buf_offset_bytes =
2401 region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2402 const VkBufferImageCopy2KHR buffer_image_copy = {
2403 .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR,
2404 .bufferOffset = buf_offset_bytes,
2405 .bufferRowLength = region->bufferRowLength / block_width,
2406 .bufferImageHeight = region->bufferImageHeight / block_height,
2407 .imageSubresource = {
2408 .aspectMask = aspect,
2409 .mipLevel = 0,
2410 .baseArrayLayer = 0,
2411 .layerCount = 1,
2412 },
2413 .imageOffset = { 0, 0, 0 },
2414 .imageExtent = { buf_width, buf_height, 1 }
2415 };
2416 handled =
2417 create_tiled_image_from_buffer(cmd_buffer,
2418 v3dv_image_from_handle(buffer_image),
2419 buffer, &buffer_image_copy);
2420 if (!handled) {
2421 /* This is unexpected, we should have setup the upload to be
2422 * conformant to a TFU or TLB copy.
2423 */
2424 unreachable("Unable to copy buffer to image through TLB");
2425 return false;
2426 }
2427
2428 /* Blit-copy the requested image extent from the buffer image to the
2429 * destination image.
2430 *
2431 * Since we are copying, the blit must use the same format on the
2432 * destination and source images to avoid format conversions. The
2433 * only exception is copying stencil, which we upload to a R8UI source
2434 * image, but that we need to blit to a S8D24 destination (the only
2435 * stencil format we support).
2436 */
2437 const VkImageBlit2KHR blit_region = {
2438 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
2439 .srcSubresource = {
2440 .aspectMask = aspect,
2441 .mipLevel = 0,
2442 .baseArrayLayer = 0,
2443 .layerCount = 1,
2444 },
2445 .srcOffsets = {
2446 { 0, 0, 0 },
2447 { region->imageExtent.width, region->imageExtent.height, 1 },
2448 },
2449 .dstSubresource = {
2450 .aspectMask = aspect,
2451 .mipLevel = region->imageSubresource.mipLevel,
2452 .baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
2453 .layerCount = 1,
2454 },
2455 .dstOffsets = {
2456 {
2457 DIV_ROUND_UP(region->imageOffset.x, block_width),
2458 DIV_ROUND_UP(region->imageOffset.y, block_height),
2459 region->imageOffset.z + i,
2460 },
2461 {
2462 DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
2463 block_width),
2464 DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
2465 block_height),
2466 region->imageOffset.z + i + 1,
2467 },
2468 },
2469 };
2470
2471 handled = blit_shader(cmd_buffer,
2472 image, dst_format,
2473 v3dv_image_from_handle(buffer_image), src_format,
2474 cmask, cswizzle,
2475 &blit_region, VK_FILTER_NEAREST, true);
2476 if (!handled) {
2477 /* This is unexpected, we should have a supported blit spec */
2478 unreachable("Unable to blit buffer to destination image");
2479 return false;
2480 }
2481 }
2482 }
2483
2484 return handled;
2485 }
2486
2487 /**
2488 * Returns true if the implementation supports the requested operation (even if
2489 * it failed to process it, for example, due to an out-of-memory error).
2490 */
2491 static bool
copy_buffer_to_image_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t region_count,const VkBufferImageCopy2KHR * regions,bool use_texel_buffer)2492 copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
2493 struct v3dv_image *image,
2494 struct v3dv_buffer *buffer,
2495 uint32_t region_count,
2496 const VkBufferImageCopy2KHR *regions,
2497 bool use_texel_buffer)
2498 {
2499 /* We can only call this with region_count > 1 if we can batch the regions
2500 * together, in which case they share the same image subresource, and so
2501 * the same aspect.
2502 */
2503 VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
2504
2505 /* Generally, the bpp of the data in the buffer matches that of the
2506 * destination image. The exception is the case where we are uploading
2507 * stencil (8bpp) to a combined d24s8 image (32bpp).
2508 */
2509 uint32_t buf_bpp = image->cpp;
2510
2511 /* We are about to upload the buffer data to an image so we can then
2512 * blit that to our destination region. Because we are going to implement
2513 * the copy as a blit, we want our blit source and destination formats to be
2514 * the same (to avoid any format conversions), so we choose a canonical
2515 * format that matches the destination image bpp.
2516 */
2517 VkComponentMapping ident_swizzle = {
2518 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
2519 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
2520 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
2521 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
2522 };
2523
2524 VkComponentMapping cswizzle = ident_swizzle;
2525 VkColorComponentFlags cmask = 0; /* Write all components */
2526 VkFormat src_format;
2527 VkFormat dst_format;
2528 switch (buf_bpp) {
2529 case 16:
2530 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2531 src_format = VK_FORMAT_R32G32B32A32_UINT;
2532 dst_format = src_format;
2533 break;
2534 case 8:
2535 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2536 src_format = VK_FORMAT_R16G16B16A16_UINT;
2537 dst_format = src_format;
2538 break;
2539 case 4:
2540 switch (aspect) {
2541 case VK_IMAGE_ASPECT_COLOR_BIT:
2542 src_format = VK_FORMAT_R8G8B8A8_UINT;
2543 dst_format = src_format;
2544 break;
2545 case VK_IMAGE_ASPECT_DEPTH_BIT:
2546 assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
2547 image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
2548 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
2549 src_format = VK_FORMAT_R8G8B8A8_UINT;
2550 dst_format = src_format;
2551 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2552
2553 /* For D24 formats, the Vulkan spec states that the depth component
2554 * in the buffer is stored in the 24-LSB, but V3D wants it in the
2555 * 24-MSB.
2556 */
2557 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
2558 image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2559 cmask = VK_COLOR_COMPONENT_G_BIT |
2560 VK_COLOR_COMPONENT_B_BIT |
2561 VK_COLOR_COMPONENT_A_BIT;
2562 cswizzle.r = VK_COMPONENT_SWIZZLE_R;
2563 cswizzle.g = VK_COMPONENT_SWIZZLE_R;
2564 cswizzle.b = VK_COMPONENT_SWIZZLE_G;
2565 cswizzle.a = VK_COMPONENT_SWIZZLE_B;
2566 }
2567 break;
2568 case VK_IMAGE_ASPECT_STENCIL_BIT:
2569 /* Since we don't support separate stencil this is always a stencil
2570 * copy to a combined depth/stencil image. Because we don't support
2571 * separate stencil images, we interpret the buffer data as a
2572 * color R8UI image, and implement the blit as a compatible color
2573 * blit to an RGBA8UI destination masking out writes to components
2574 * GBA (which map to the D24 component of a S8D24 image).
2575 */
2576 assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
2577 buf_bpp = 1;
2578 src_format = VK_FORMAT_R8_UINT;
2579 dst_format = VK_FORMAT_R8G8B8A8_UINT;
2580 cmask = VK_COLOR_COMPONENT_R_BIT;
2581 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2582 break;
2583 default:
2584 unreachable("unsupported aspect");
2585 return false;
2586 };
2587 break;
2588 case 2:
2589 aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2590 src_format = VK_FORMAT_R16_UINT;
2591 dst_format = src_format;
2592 break;
2593 case 1:
2594 assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2595 src_format = VK_FORMAT_R8_UINT;
2596 dst_format = src_format;
2597 break;
2598 default:
2599 unreachable("unsupported bit-size");
2600 return false;
2601 }
2602
2603 if (use_texel_buffer) {
2604 return texel_buffer_shader_copy(cmd_buffer, aspect, image,
2605 dst_format, src_format,
2606 buffer, buf_bpp,
2607 cmask, &cswizzle,
2608 region_count, regions);
2609 } else {
2610 return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
2611 dst_format, src_format,
2612 buffer, buf_bpp,
2613 cmask, &cswizzle,
2614 region_count, regions);
2615 }
2616 }
2617
2618 /**
2619 * Returns true if the implementation supports the requested operation (even if
2620 * it failed to process it, for example, due to an out-of-memory error).
2621 */
2622 static bool
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,struct v3dv_buffer * buffer,const VkBufferImageCopy2KHR * region)2623 copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
2624 struct v3dv_image *image,
2625 struct v3dv_buffer *buffer,
2626 const VkBufferImageCopy2KHR *region)
2627 {
2628 /* FIXME */
2629 if (vk_format_is_depth_or_stencil(image->vk.format))
2630 return false;
2631
2632 if (vk_format_is_compressed(image->vk.format))
2633 return false;
2634
2635 if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
2636 return false;
2637
2638 uint32_t buffer_width, buffer_height;
2639 if (region->bufferRowLength == 0)
2640 buffer_width = region->imageExtent.width;
2641 else
2642 buffer_width = region->bufferRowLength;
2643
2644 if (region->bufferImageHeight == 0)
2645 buffer_height = region->imageExtent.height;
2646 else
2647 buffer_height = region->bufferImageHeight;
2648
2649 uint32_t buffer_stride = buffer_width * image->cpp;
2650 uint32_t buffer_layer_stride = buffer_stride * buffer_height;
2651
2652 uint32_t num_layers;
2653 if (image->vk.image_type != VK_IMAGE_TYPE_3D)
2654 num_layers = region->imageSubresource.layerCount;
2655 else
2656 num_layers = region->imageExtent.depth;
2657 assert(num_layers > 0);
2658
2659 struct v3dv_job *job =
2660 v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
2661 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
2662 cmd_buffer, -1);
2663 if (!job)
2664 return true;
2665
2666 job->cpu.copy_buffer_to_image.image = image;
2667 job->cpu.copy_buffer_to_image.buffer = buffer;
2668 job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
2669 job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
2670 job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
2671 job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
2672 job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
2673 job->cpu.copy_buffer_to_image.mip_level =
2674 region->imageSubresource.mipLevel;
2675 job->cpu.copy_buffer_to_image.base_layer =
2676 region->imageSubresource.baseArrayLayer;
2677 job->cpu.copy_buffer_to_image.layer_count = num_layers;
2678
2679 list_addtail(&job->list_link, &cmd_buffer->jobs);
2680
2681 return true;
2682 }
2683
2684 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2KHR * info)2685 v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
2686 const VkCopyBufferToImageInfo2KHR *info)
2687 {
2688 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2689 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
2690 V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
2691
2692 assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2693
2694 uint32_t r = 0;
2695 while (r < info->regionCount) {
2696 /* The TFU and TLB paths can only copy one region at a time and the region
2697 * needs to start at the origin. We try these first for the common case
2698 * where we are copying full images, since they should be the fastest.
2699 */
2700 uint32_t batch_size = 1;
2701 if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
2702 goto handled;
2703
2704 if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
2705 goto handled;
2706
2707 /* Otherwise, we are copying subrects, so we fallback to copying
2708 * via shader and texel buffers and we try to batch the regions
2709 * if possible. We can only batch copies if they have the same
2710 * framebuffer spec, which is mostly determined by the image
2711 * subresource of the region.
2712 */
2713 const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
2714 for (uint32_t s = r + 1; s < info->regionCount; s++) {
2715 const VkImageSubresourceLayers *rsc_s =
2716 &info->pRegions[s].imageSubresource;
2717
2718 if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
2719 break;
2720
2721 /* For 3D images we also need to check the depth extent */
2722 if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
2723 info->pRegions[s].imageExtent.depth !=
2724 info->pRegions[r].imageExtent.depth) {
2725 break;
2726 }
2727
2728 batch_size++;
2729 }
2730
2731 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2732 batch_size, &info->pRegions[r], true)) {
2733 goto handled;
2734 }
2735
2736 /* If we still could not copy, fallback to slower paths.
2737 *
2738 * FIXME: we could try to batch these too, but since they are bound to be
2739 * slow it might not be worth it and we should instead put more effort
2740 * in handling more cases with the other paths.
2741 */
2742 if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
2743 &info->pRegions[r])) {
2744 batch_size = 1;
2745 goto handled;
2746 }
2747
2748 if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2749 batch_size, &info->pRegions[r], false)) {
2750 goto handled;
2751 }
2752
2753 unreachable("Unsupported buffer to image copy.");
2754
2755 handled:
2756 r += batch_size;
2757 }
2758 }
2759
2760 static void
2761 compute_blit_3d_layers(const VkOffset3D *offsets,
2762 uint32_t *min_layer, uint32_t *max_layer,
2763 bool *mirror_z);
2764
2765 /**
2766 * Returns true if the implementation supports the requested operation (even if
2767 * it failed to process it, for example, due to an out-of-memory error).
2768 *
2769 * The TFU blit path doesn't handle scaling so the blit filter parameter can
2770 * be ignored.
2771 */
2772 static bool
blit_tfu(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageBlit2KHR * region)2773 blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
2774 struct v3dv_image *dst,
2775 struct v3dv_image *src,
2776 const VkImageBlit2KHR *region)
2777 {
2778 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2779 assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
2780
2781 /* Format must match */
2782 if (src->vk.format != dst->vk.format)
2783 return false;
2784
2785 /* Destination can't be raster format */
2786 if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
2787 return false;
2788
2789 /* Source region must start at (0,0) */
2790 if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
2791 return false;
2792
2793 /* Destination image must be complete */
2794 if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
2795 return false;
2796
2797 const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
2798 const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
2799 const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
2800 if (region->dstOffsets[1].x < dst_width - 1||
2801 region->dstOffsets[1].y < dst_height - 1) {
2802 return false;
2803 }
2804
2805 /* No XY scaling */
2806 if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
2807 region->srcOffsets[1].y != region->dstOffsets[1].y) {
2808 return false;
2809 }
2810
2811 /* If the format is D24S8 both aspects need to be copied, since the TFU
2812 * can't be programmed to copy only one aspect of the image.
2813 */
2814 if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
2815 const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
2816 VK_IMAGE_ASPECT_STENCIL_BIT;
2817 if (region->dstSubresource.aspectMask != ds_aspects)
2818 return false;
2819 }
2820
2821 /* Our TFU blits only handle exact copies (it requires same formats
2822 * on input and output, no scaling, etc), so there is no pixel format
2823 * conversions and we can rewrite the format to use one that is TFU
2824 * compatible based on its texel size.
2825 */
2826 const struct v3dv_format *format =
2827 v3dv_get_compatible_tfu_format(cmd_buffer->device,
2828 dst->cpp, NULL);
2829
2830 /* Emit a TFU job for each layer to blit */
2831 assert(region->dstSubresource.layerCount ==
2832 region->srcSubresource.layerCount);
2833
2834 uint32_t min_dst_layer;
2835 uint32_t max_dst_layer;
2836 bool dst_mirror_z = false;
2837 if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
2838 compute_blit_3d_layers(region->dstOffsets,
2839 &min_dst_layer, &max_dst_layer,
2840 &dst_mirror_z);
2841 } else {
2842 min_dst_layer = region->dstSubresource.baseArrayLayer;
2843 max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
2844 }
2845
2846 uint32_t min_src_layer;
2847 uint32_t max_src_layer;
2848 bool src_mirror_z = false;
2849 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
2850 compute_blit_3d_layers(region->srcOffsets,
2851 &min_src_layer, &max_src_layer,
2852 &src_mirror_z);
2853 } else {
2854 min_src_layer = region->srcSubresource.baseArrayLayer;
2855 max_src_layer = min_src_layer + region->srcSubresource.layerCount;
2856 }
2857
2858 /* No Z scaling for 3D images (for non-3D images both src and dst must
2859 * have the same layerCount).
2860 */
2861 if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
2862 return false;
2863
2864 const uint32_t layer_count = max_dst_layer - min_dst_layer;
2865 const uint32_t src_mip_level = region->srcSubresource.mipLevel;
2866 for (uint32_t i = 0; i < layer_count; i++) {
2867 /* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
2868 * only involves reversing the order of the slices.
2869 */
2870 const uint32_t dst_layer =
2871 dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
2872 const uint32_t src_layer =
2873 src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
2874 v3dv_X(cmd_buffer->device, meta_emit_tfu_job)
2875 (cmd_buffer, dst, dst_mip_level, dst_layer,
2876 src, src_mip_level, src_layer,
2877 dst_width, dst_height, format);
2878 }
2879
2880 return true;
2881 }
2882
2883 static bool
format_needs_software_int_clamp(VkFormat format)2884 format_needs_software_int_clamp(VkFormat format)
2885 {
2886 switch (format) {
2887 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
2888 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
2889 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
2890 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
2891 return true;
2892 default:
2893 return false;
2894 };
2895 }
2896
2897 static void
get_blit_pipeline_cache_key(VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,uint8_t * key)2898 get_blit_pipeline_cache_key(VkFormat dst_format,
2899 VkFormat src_format,
2900 VkColorComponentFlags cmask,
2901 VkSampleCountFlagBits dst_samples,
2902 VkSampleCountFlagBits src_samples,
2903 uint8_t *key)
2904 {
2905 memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
2906
2907 uint32_t *p = (uint32_t *) key;
2908
2909 *p = dst_format;
2910 p++;
2911
2912 /* Generally, when blitting from a larger format to a smaller format
2913 * the hardware takes care of clamping the source to the RT range.
2914 * Specifically, for integer formats, this is done by using
2915 * V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
2916 * clamps to the bit-size of the render type, and some formats, such as
2917 * rgb10a2_uint have a 16-bit type, so it won't do what we need and we
2918 * require to clamp in software. In these cases, we need to amend the blit
2919 * shader with clamp code that depends on both the src and dst formats, so
2920 * we need the src format to be part of the key.
2921 */
2922 *p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
2923 p++;
2924
2925 *p = cmask;
2926 p++;
2927
2928 *p = (dst_samples << 8) | src_samples;
2929 p++;
2930
2931 assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
2932 }
2933
2934 static bool
create_blit_render_pass(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkRenderPass * pass_load,VkRenderPass * pass_no_load)2935 create_blit_render_pass(struct v3dv_device *device,
2936 VkFormat dst_format,
2937 VkFormat src_format,
2938 VkRenderPass *pass_load,
2939 VkRenderPass *pass_no_load)
2940 {
2941 const bool is_color_blit = vk_format_is_color(dst_format);
2942
2943 /* Attachment load operation is specified below */
2944 VkAttachmentDescription att = {
2945 .format = dst_format,
2946 .samples = VK_SAMPLE_COUNT_1_BIT,
2947 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
2948 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2949 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
2950 };
2951
2952 VkAttachmentReference att_ref = {
2953 .attachment = 0,
2954 .layout = VK_IMAGE_LAYOUT_GENERAL,
2955 };
2956
2957 VkSubpassDescription subpass = {
2958 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
2959 .inputAttachmentCount = 0,
2960 .colorAttachmentCount = is_color_blit ? 1 : 0,
2961 .pColorAttachments = is_color_blit ? &att_ref : NULL,
2962 .pResolveAttachments = NULL,
2963 .pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
2964 .preserveAttachmentCount = 0,
2965 .pPreserveAttachments = NULL,
2966 };
2967
2968 VkRenderPassCreateInfo info = {
2969 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
2970 .attachmentCount = 1,
2971 .pAttachments = &att,
2972 .subpassCount = 1,
2973 .pSubpasses = &subpass,
2974 .dependencyCount = 0,
2975 .pDependencies = NULL,
2976 };
2977
2978 VkResult result;
2979 att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2980 result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
2981 &info, &device->vk.alloc, pass_load);
2982 if (result != VK_SUCCESS)
2983 return false;
2984
2985 att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2986 result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
2987 &info, &device->vk.alloc, pass_no_load);
2988 return result == VK_SUCCESS;
2989 }
2990
2991 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)2992 gen_rect_vertices(nir_builder *b)
2993 {
2994 nir_ssa_def *vertex_id = nir_load_vertex_id(b);
2995
2996 /* vertex 0: -1.0, -1.0
2997 * vertex 1: -1.0, 1.0
2998 * vertex 2: 1.0, -1.0
2999 * vertex 3: 1.0, 1.0
3000 *
3001 * so:
3002 *
3003 * channel 0 is vertex_id < 2 ? -1.0 : 1.0
3004 * channel 1 is vertex id & 1 ? 1.0 : -1.0
3005 */
3006
3007 nir_ssa_def *one = nir_imm_int(b, 1);
3008 nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3009 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3010
3011 nir_ssa_def *comp[4];
3012 comp[0] = nir_bcsel(b, c0cmp,
3013 nir_imm_float(b, -1.0f),
3014 nir_imm_float(b, 1.0f));
3015
3016 comp[1] = nir_bcsel(b, c1cmp,
3017 nir_imm_float(b, 1.0f),
3018 nir_imm_float(b, -1.0f));
3019 comp[2] = nir_imm_float(b, 0.0f);
3020 comp[3] = nir_imm_float(b, 1.0f);
3021 return nir_vec(b, comp, 4);
3022 }
3023
3024 static nir_ssa_def *
gen_tex_coords(nir_builder * b)3025 gen_tex_coords(nir_builder *b)
3026 {
3027 nir_ssa_def *tex_box =
3028 nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3029
3030 nir_ssa_def *tex_z =
3031 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3032
3033 nir_ssa_def *vertex_id = nir_load_vertex_id(b);
3034
3035 /* vertex 0: src0_x, src0_y
3036 * vertex 1: src0_x, src1_y
3037 * vertex 2: src1_x, src0_y
3038 * vertex 3: src1_x, src1_y
3039 *
3040 * So:
3041 *
3042 * channel 0 is vertex_id < 2 ? src0_x : src1_x
3043 * channel 1 is vertex id & 1 ? src1_y : src0_y
3044 */
3045
3046 nir_ssa_def *one = nir_imm_int(b, 1);
3047 nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3048 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3049
3050 nir_ssa_def *comp[4];
3051 comp[0] = nir_bcsel(b, c0cmp,
3052 nir_channel(b, tex_box, 0),
3053 nir_channel(b, tex_box, 2));
3054
3055 comp[1] = nir_bcsel(b, c1cmp,
3056 nir_channel(b, tex_box, 3),
3057 nir_channel(b, tex_box, 1));
3058 comp[2] = tex_z;
3059 comp[3] = nir_imm_float(b, 1.0f);
3060 return nir_vec(b, comp, 4);
3061 }
3062
3063 static nir_ssa_def *
build_nir_tex_op_read(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,enum glsl_sampler_dim dim)3064 build_nir_tex_op_read(struct nir_builder *b,
3065 nir_ssa_def *tex_pos,
3066 enum glsl_base_type tex_type,
3067 enum glsl_sampler_dim dim)
3068 {
3069 assert(dim != GLSL_SAMPLER_DIM_MS);
3070
3071 const struct glsl_type *sampler_type =
3072 glsl_sampler_type(dim, false, false, tex_type);
3073 nir_variable *sampler =
3074 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3075 sampler->data.descriptor_set = 0;
3076 sampler->data.binding = 0;
3077
3078 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3079 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3080 tex->sampler_dim = dim;
3081 tex->op = nir_texop_tex;
3082 tex->src[0].src_type = nir_tex_src_coord;
3083 tex->src[0].src = nir_src_for_ssa(tex_pos);
3084 tex->src[1].src_type = nir_tex_src_texture_deref;
3085 tex->src[1].src = nir_src_for_ssa(tex_deref);
3086 tex->src[2].src_type = nir_tex_src_sampler_deref;
3087 tex->src[2].src = nir_src_for_ssa(tex_deref);
3088 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3089 tex->is_array = glsl_sampler_type_is_array(sampler_type);
3090 tex->coord_components = tex_pos->num_components;
3091
3092 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3093 nir_builder_instr_insert(b, &tex->instr);
3094 return &tex->dest.ssa;
3095 }
3096
3097 static nir_ssa_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder * b,nir_variable * sampler,nir_ssa_def * tex_deref,enum glsl_base_type tex_type,nir_ssa_def * tex_pos,nir_ssa_def * sample_idx)3098 build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3099 nir_variable *sampler,
3100 nir_ssa_def *tex_deref,
3101 enum glsl_base_type tex_type,
3102 nir_ssa_def *tex_pos,
3103 nir_ssa_def *sample_idx)
3104 {
3105 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
3106 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3107 tex->op = nir_texop_txf_ms;
3108 tex->src[0].src_type = nir_tex_src_coord;
3109 tex->src[0].src = nir_src_for_ssa(tex_pos);
3110 tex->src[1].src_type = nir_tex_src_texture_deref;
3111 tex->src[1].src = nir_src_for_ssa(tex_deref);
3112 tex->src[2].src_type = nir_tex_src_sampler_deref;
3113 tex->src[2].src = nir_src_for_ssa(tex_deref);
3114 tex->src[3].src_type = nir_tex_src_ms_index;
3115 tex->src[3].src = nir_src_for_ssa(sample_idx);
3116 tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3117 tex->is_array = false;
3118 tex->coord_components = tex_pos->num_components;
3119
3120 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3121 nir_builder_instr_insert(b, &tex->instr);
3122 return &tex->dest.ssa;
3123 }
3124
3125 /* Fetches all samples at the given position and averages them */
3126 static nir_ssa_def *
build_nir_tex_op_ms_resolve(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits src_samples)3127 build_nir_tex_op_ms_resolve(struct nir_builder *b,
3128 nir_ssa_def *tex_pos,
3129 enum glsl_base_type tex_type,
3130 VkSampleCountFlagBits src_samples)
3131 {
3132 assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3133 const struct glsl_type *sampler_type =
3134 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3135 nir_variable *sampler =
3136 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3137 sampler->data.descriptor_set = 0;
3138 sampler->data.binding = 0;
3139
3140 const bool is_int = glsl_base_type_is_integer(tex_type);
3141
3142 nir_ssa_def *tmp = NULL;
3143 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3144 for (uint32_t i = 0; i < src_samples; i++) {
3145 nir_ssa_def *s =
3146 build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3147 tex_type, tex_pos,
3148 nir_imm_int(b, i));
3149
3150 /* For integer formats, the multisample resolve operation is expected to
3151 * return one of the samples, we just return the first one.
3152 */
3153 if (is_int)
3154 return s;
3155
3156 tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3157 }
3158
3159 assert(!is_int);
3160 return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
3161 }
3162
3163 /* Fetches the current sample (gl_SampleID) at the given position */
3164 static nir_ssa_def *
build_nir_tex_op_ms_read(struct nir_builder * b,nir_ssa_def * tex_pos,enum glsl_base_type tex_type)3165 build_nir_tex_op_ms_read(struct nir_builder *b,
3166 nir_ssa_def *tex_pos,
3167 enum glsl_base_type tex_type)
3168 {
3169 const struct glsl_type *sampler_type =
3170 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3171 nir_variable *sampler =
3172 nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3173 sampler->data.descriptor_set = 0;
3174 sampler->data.binding = 0;
3175
3176 nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3177
3178 return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3179 tex_type, tex_pos,
3180 nir_load_sample_id(b));
3181 }
3182
3183 static nir_ssa_def *
build_nir_tex_op(struct nir_builder * b,struct v3dv_device * device,nir_ssa_def * tex_pos,enum glsl_base_type tex_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim dim)3184 build_nir_tex_op(struct nir_builder *b,
3185 struct v3dv_device *device,
3186 nir_ssa_def *tex_pos,
3187 enum glsl_base_type tex_type,
3188 VkSampleCountFlagBits dst_samples,
3189 VkSampleCountFlagBits src_samples,
3190 enum glsl_sampler_dim dim)
3191 {
3192 switch (dim) {
3193 case GLSL_SAMPLER_DIM_MS:
3194 assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3195 /* For multisampled texture sources we need to use fetching instead of
3196 * normalized texture coordinates. We already configured our blit
3197 * coordinates to be in texel units, but here we still need to convert
3198 * them from floating point to integer.
3199 */
3200 tex_pos = nir_f2i32(b, tex_pos);
3201
3202 if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3203 return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3204 else
3205 return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3206 default:
3207 assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3208 return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3209 }
3210 }
3211
3212 static nir_shader *
get_blit_vs()3213 get_blit_vs()
3214 {
3215 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3216 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3217 "meta blit vs");
3218
3219 const struct glsl_type *vec4 = glsl_vec4_type();
3220
3221 nir_variable *vs_out_pos =
3222 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3223 vs_out_pos->data.location = VARYING_SLOT_POS;
3224
3225 nir_variable *vs_out_tex_coord =
3226 nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3227 vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3228 vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3229
3230 nir_ssa_def *pos = gen_rect_vertices(&b);
3231 nir_store_var(&b, vs_out_pos, pos, 0xf);
3232
3233 nir_ssa_def *tex_coord = gen_tex_coords(&b);
3234 nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3235
3236 return b.shader;
3237 }
3238
3239 static uint32_t
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3240 get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3241 {
3242 switch (sampler_dim) {
3243 case GLSL_SAMPLER_DIM_1D: return 0x1;
3244 case GLSL_SAMPLER_DIM_2D: return 0x3;
3245 case GLSL_SAMPLER_DIM_MS: return 0x3;
3246 case GLSL_SAMPLER_DIM_3D: return 0x7;
3247 default:
3248 unreachable("invalid sampler dim");
3249 };
3250 }
3251
3252 static nir_shader *
get_color_blit_fs(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,enum glsl_sampler_dim sampler_dim)3253 get_color_blit_fs(struct v3dv_device *device,
3254 VkFormat dst_format,
3255 VkFormat src_format,
3256 VkSampleCountFlagBits dst_samples,
3257 VkSampleCountFlagBits src_samples,
3258 enum glsl_sampler_dim sampler_dim)
3259 {
3260 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3261 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3262 "meta blit fs");
3263
3264 const struct glsl_type *vec4 = glsl_vec4_type();
3265
3266 nir_variable *fs_in_tex_coord =
3267 nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3268 fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3269
3270 const struct glsl_type *fs_out_type =
3271 vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3272 vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3273 glsl_vec4_type();
3274
3275 enum glsl_base_type src_base_type =
3276 vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3277 vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3278 GLSL_TYPE_FLOAT;
3279
3280 nir_variable *fs_out_color =
3281 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3282 fs_out_color->data.location = FRAG_RESULT_DATA0;
3283
3284 nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3285 const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3286 tex_coord = nir_channels(&b, tex_coord, channel_mask);
3287
3288 nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3289 dst_samples, src_samples, sampler_dim);
3290
3291 /* For integer textures, if the bit-size of the destination is too small to
3292 * hold source value, Vulkan (CTS) expects the implementation to clamp to the
3293 * maximum value the destination can hold. The hardware can clamp to the
3294 * render target type, which usually matches the component bit-size, but
3295 * there are some cases that won't match, such as rgb10a2, which has a 16-bit
3296 * render target type, so in these cases we need to clamp manually.
3297 */
3298 if (format_needs_software_int_clamp(dst_format)) {
3299 assert(vk_format_is_int(dst_format));
3300 enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3301 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3302
3303 nir_ssa_def *c[4];
3304 for (uint32_t i = 0; i < 4; i++) {
3305 c[i] = nir_channel(&b, color, i);
3306
3307 const uint32_t src_bit_size =
3308 util_format_get_component_bits(src_pformat,
3309 UTIL_FORMAT_COLORSPACE_RGB,
3310 i);
3311 const uint32_t dst_bit_size =
3312 util_format_get_component_bits(dst_pformat,
3313 UTIL_FORMAT_COLORSPACE_RGB,
3314 i);
3315
3316 if (dst_bit_size >= src_bit_size)
3317 continue;
3318
3319 assert(dst_bit_size > 0);
3320 if (util_format_is_pure_uint(dst_pformat)) {
3321 nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3322 c[i] = nir_umin(&b, c[i], max);
3323 } else {
3324 nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3325 nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3326 c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3327 }
3328 }
3329
3330 color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3331 }
3332
3333 nir_store_var(&b, fs_out_color, color, 0xf);
3334
3335 return b.shader;
3336 }
3337
3338 static bool
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineMultisampleStateCreateInfo * ms_state,const VkPipelineLayout layout,VkPipeline * pipeline)3339 create_pipeline(struct v3dv_device *device,
3340 struct v3dv_render_pass *pass,
3341 struct nir_shader *vs_nir,
3342 struct nir_shader *gs_nir,
3343 struct nir_shader *fs_nir,
3344 const VkPipelineVertexInputStateCreateInfo *vi_state,
3345 const VkPipelineDepthStencilStateCreateInfo *ds_state,
3346 const VkPipelineColorBlendStateCreateInfo *cb_state,
3347 const VkPipelineMultisampleStateCreateInfo *ms_state,
3348 const VkPipelineLayout layout,
3349 VkPipeline *pipeline)
3350 {
3351 struct vk_shader_module vs_m;
3352 struct vk_shader_module gs_m;
3353 struct vk_shader_module fs_m;
3354
3355 uint32_t num_stages = gs_nir ? 3 : 2;
3356
3357 v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
3358 v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
3359
3360 VkPipelineShaderStageCreateInfo stages[3] = {
3361 {
3362 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3363 .stage = VK_SHADER_STAGE_VERTEX_BIT,
3364 .module = vk_shader_module_to_handle(&vs_m),
3365 .pName = "main",
3366 },
3367 {
3368 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3369 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3370 .module = vk_shader_module_to_handle(&fs_m),
3371 .pName = "main",
3372 },
3373 {
3374 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3375 .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3376 .module = VK_NULL_HANDLE,
3377 .pName = "main",
3378 },
3379 };
3380
3381 if (gs_nir) {
3382 v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
3383 stages[2].module = vk_shader_module_to_handle(&gs_m);
3384 }
3385
3386 VkGraphicsPipelineCreateInfo info = {
3387 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3388
3389 .stageCount = num_stages,
3390 .pStages = stages,
3391
3392 .pVertexInputState = vi_state,
3393
3394 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3395 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3396 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3397 .primitiveRestartEnable = false,
3398 },
3399
3400 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
3401 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3402 .viewportCount = 1,
3403 .scissorCount = 1,
3404 },
3405
3406 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3407 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3408 .rasterizerDiscardEnable = false,
3409 .polygonMode = VK_POLYGON_MODE_FILL,
3410 .cullMode = VK_CULL_MODE_NONE,
3411 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3412 .depthBiasEnable = false,
3413 },
3414
3415 .pMultisampleState = ms_state,
3416
3417 .pDepthStencilState = ds_state,
3418
3419 .pColorBlendState = cb_state,
3420
3421 /* The meta clear pipeline declares all state as dynamic.
3422 * As a consequence, vkCmdBindPipeline writes no dynamic state
3423 * to the cmd buffer. Therefore, at the end of the meta clear,
3424 * we need only restore dynamic state that was vkCmdSet.
3425 */
3426 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3427 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3428 .dynamicStateCount = 6,
3429 .pDynamicStates = (VkDynamicState[]) {
3430 VK_DYNAMIC_STATE_VIEWPORT,
3431 VK_DYNAMIC_STATE_SCISSOR,
3432 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3433 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3434 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3435 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3436 VK_DYNAMIC_STATE_DEPTH_BIAS,
3437 VK_DYNAMIC_STATE_LINE_WIDTH,
3438 },
3439 },
3440
3441 .flags = 0,
3442 .layout = layout,
3443 .renderPass = v3dv_render_pass_to_handle(pass),
3444 .subpass = 0,
3445 };
3446
3447 VkResult result =
3448 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3449 VK_NULL_HANDLE,
3450 1, &info,
3451 &device->vk.alloc,
3452 pipeline);
3453
3454 ralloc_free(vs_nir);
3455 ralloc_free(fs_nir);
3456
3457 return result == VK_SUCCESS;
3458 }
3459
3460 static enum glsl_sampler_dim
get_sampler_dim(VkImageType type,VkSampleCountFlagBits src_samples)3461 get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3462 {
3463 /* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3464 *
3465 * "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3466 * VK_IMAGE_TYPE_2D, ..."
3467 */
3468 assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3469
3470 switch (type) {
3471 case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3472 case VK_IMAGE_TYPE_2D:
3473 return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3474 GLSL_SAMPLER_DIM_MS;
3475 case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3476 default:
3477 unreachable("Invalid image type");
3478 }
3479 }
3480
3481 static bool
create_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,VkRenderPass _pass,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)3482 create_blit_pipeline(struct v3dv_device *device,
3483 VkFormat dst_format,
3484 VkFormat src_format,
3485 VkColorComponentFlags cmask,
3486 VkImageType src_type,
3487 VkSampleCountFlagBits dst_samples,
3488 VkSampleCountFlagBits src_samples,
3489 VkRenderPass _pass,
3490 VkPipelineLayout pipeline_layout,
3491 VkPipeline *pipeline)
3492 {
3493 struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
3494
3495 /* We always rewrite depth/stencil blits to compatible color blits */
3496 assert(vk_format_is_color(dst_format));
3497 assert(vk_format_is_color(src_format));
3498
3499 const enum glsl_sampler_dim sampler_dim =
3500 get_sampler_dim(src_type, src_samples);
3501
3502 nir_shader *vs_nir = get_blit_vs();
3503 nir_shader *fs_nir =
3504 get_color_blit_fs(device, dst_format, src_format,
3505 dst_samples, src_samples, sampler_dim);
3506
3507 const VkPipelineVertexInputStateCreateInfo vi_state = {
3508 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
3509 .vertexBindingDescriptionCount = 0,
3510 .vertexAttributeDescriptionCount = 0,
3511 };
3512
3513 VkPipelineDepthStencilStateCreateInfo ds_state = {
3514 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
3515 };
3516
3517 VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
3518 blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
3519 .blendEnable = false,
3520 .colorWriteMask = cmask,
3521 };
3522
3523 const VkPipelineColorBlendStateCreateInfo cb_state = {
3524 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3525 .logicOpEnable = false,
3526 .attachmentCount = 1,
3527 .pAttachments = blend_att_state
3528 };
3529
3530 const VkPipelineMultisampleStateCreateInfo ms_state = {
3531 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
3532 .rasterizationSamples = dst_samples,
3533 .sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
3534 .pSampleMask = NULL,
3535 .alphaToCoverageEnable = false,
3536 .alphaToOneEnable = false,
3537 };
3538
3539 return create_pipeline(device,
3540 pass,
3541 vs_nir, NULL, fs_nir,
3542 &vi_state,
3543 &ds_state,
3544 &cb_state,
3545 &ms_state,
3546 pipeline_layout,
3547 pipeline);
3548 }
3549
3550 /**
3551 * Return a pipeline suitable for blitting the requested aspect given the
3552 * destination and source formats.
3553 */
3554 static bool
get_blit_pipeline(struct v3dv_device * device,VkFormat dst_format,VkFormat src_format,VkColorComponentFlags cmask,VkImageType src_type,VkSampleCountFlagBits dst_samples,VkSampleCountFlagBits src_samples,struct v3dv_meta_blit_pipeline ** pipeline)3555 get_blit_pipeline(struct v3dv_device *device,
3556 VkFormat dst_format,
3557 VkFormat src_format,
3558 VkColorComponentFlags cmask,
3559 VkImageType src_type,
3560 VkSampleCountFlagBits dst_samples,
3561 VkSampleCountFlagBits src_samples,
3562 struct v3dv_meta_blit_pipeline **pipeline)
3563 {
3564 bool ok = true;
3565
3566 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
3567 get_blit_pipeline_cache_key(dst_format, src_format, cmask,
3568 dst_samples, src_samples, key);
3569 mtx_lock(&device->meta.mtx);
3570 struct hash_entry *entry =
3571 _mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
3572 if (entry) {
3573 mtx_unlock(&device->meta.mtx);
3574 *pipeline = entry->data;
3575 return true;
3576 }
3577
3578 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
3579 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3580
3581 if (*pipeline == NULL)
3582 goto fail;
3583
3584 ok = create_blit_render_pass(device, dst_format, src_format,
3585 &(*pipeline)->pass,
3586 &(*pipeline)->pass_no_load);
3587 if (!ok)
3588 goto fail;
3589
3590 /* Create the pipeline using one of the render passes, they are both
3591 * compatible, so we don't care which one we use here.
3592 */
3593 ok = create_blit_pipeline(device,
3594 dst_format,
3595 src_format,
3596 cmask,
3597 src_type,
3598 dst_samples,
3599 src_samples,
3600 (*pipeline)->pass,
3601 device->meta.blit.p_layout,
3602 &(*pipeline)->pipeline);
3603 if (!ok)
3604 goto fail;
3605
3606 memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
3607 _mesa_hash_table_insert(device->meta.blit.cache[src_type],
3608 &(*pipeline)->key, *pipeline);
3609
3610 mtx_unlock(&device->meta.mtx);
3611 return true;
3612
3613 fail:
3614 mtx_unlock(&device->meta.mtx);
3615
3616 VkDevice _device = v3dv_device_to_handle(device);
3617 if (*pipeline) {
3618 if ((*pipeline)->pass)
3619 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
3620 if ((*pipeline)->pass_no_load)
3621 v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
3622 if ((*pipeline)->pipeline)
3623 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
3624 vk_free(&device->vk.alloc, *pipeline);
3625 *pipeline = NULL;
3626 }
3627
3628 return false;
3629 }
3630
3631 static void
compute_blit_box(const VkOffset3D * offsets,uint32_t image_w,uint32_t image_h,uint32_t * x,uint32_t * y,uint32_t * w,uint32_t * h,bool * mirror_x,bool * mirror_y)3632 compute_blit_box(const VkOffset3D *offsets,
3633 uint32_t image_w, uint32_t image_h,
3634 uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
3635 bool *mirror_x, bool *mirror_y)
3636 {
3637 if (offsets[1].x >= offsets[0].x) {
3638 *mirror_x = false;
3639 *x = MIN2(offsets[0].x, image_w - 1);
3640 *w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
3641 } else {
3642 *mirror_x = true;
3643 *x = MIN2(offsets[1].x, image_w - 1);
3644 *w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
3645 }
3646 if (offsets[1].y >= offsets[0].y) {
3647 *mirror_y = false;
3648 *y = MIN2(offsets[0].y, image_h - 1);
3649 *h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
3650 } else {
3651 *mirror_y = true;
3652 *y = MIN2(offsets[1].y, image_h - 1);
3653 *h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
3654 }
3655 }
3656
3657 static void
compute_blit_3d_layers(const VkOffset3D * offsets,uint32_t * min_layer,uint32_t * max_layer,bool * mirror_z)3658 compute_blit_3d_layers(const VkOffset3D *offsets,
3659 uint32_t *min_layer, uint32_t *max_layer,
3660 bool *mirror_z)
3661 {
3662 if (offsets[1].z >= offsets[0].z) {
3663 *mirror_z = false;
3664 *min_layer = offsets[0].z;
3665 *max_layer = offsets[1].z;
3666 } else {
3667 *mirror_z = true;
3668 *min_layer = offsets[1].z;
3669 *max_layer = offsets[0].z;
3670 }
3671 }
3672
3673 static VkResult
create_blit_descriptor_pool(struct v3dv_cmd_buffer * cmd_buffer)3674 create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
3675 {
3676 /* If this is not the first pool we create for this command buffer
3677 * size it based on the size of the currently exhausted pool.
3678 */
3679 uint32_t descriptor_count = 64;
3680 if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
3681 struct v3dv_descriptor_pool *exhausted_pool =
3682 v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
3683 descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
3684 }
3685
3686 /* Create the descriptor pool */
3687 cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
3688 VkDescriptorPoolSize pool_size = {
3689 .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
3690 .descriptorCount = descriptor_count,
3691 };
3692 VkDescriptorPoolCreateInfo info = {
3693 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
3694 .maxSets = descriptor_count,
3695 .poolSizeCount = 1,
3696 .pPoolSizes = &pool_size,
3697 .flags = 0,
3698 };
3699 VkResult result =
3700 v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
3701 &info,
3702 &cmd_buffer->device->vk.alloc,
3703 &cmd_buffer->meta.blit.dspool);
3704
3705 if (result == VK_SUCCESS) {
3706 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3707 const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
3708
3709 v3dv_cmd_buffer_add_private_obj(
3710 cmd_buffer, (uintptr_t) _pool,
3711 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
3712
3713 struct v3dv_descriptor_pool *pool =
3714 v3dv_descriptor_pool_from_handle(_pool);
3715 pool->is_driver_internal = true;
3716 }
3717
3718 return result;
3719 }
3720
3721 static VkResult
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer * cmd_buffer,VkDescriptorSet * set)3722 allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
3723 VkDescriptorSet *set)
3724 {
3725 /* Make sure we have a descriptor pool */
3726 VkResult result;
3727 if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
3728 result = create_blit_descriptor_pool(cmd_buffer);
3729 if (result != VK_SUCCESS)
3730 return result;
3731 }
3732 assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3733
3734 /* Allocate descriptor set */
3735 struct v3dv_device *device = cmd_buffer->device;
3736 VkDevice _device = v3dv_device_to_handle(device);
3737 VkDescriptorSetAllocateInfo info = {
3738 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3739 .descriptorPool = cmd_buffer->meta.blit.dspool,
3740 .descriptorSetCount = 1,
3741 .pSetLayouts = &device->meta.blit.ds_layout,
3742 };
3743 result = v3dv_AllocateDescriptorSets(_device, &info, set);
3744
3745 /* If we ran out of pool space, grow the pool and try again */
3746 if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
3747 result = create_blit_descriptor_pool(cmd_buffer);
3748 if (result == VK_SUCCESS) {
3749 info.descriptorPool = cmd_buffer->meta.blit.dspool;
3750 result = v3dv_AllocateDescriptorSets(_device, &info, set);
3751 }
3752 }
3753
3754 return result;
3755 }
3756
3757 /**
3758 * Returns true if the implementation supports the requested operation (even if
3759 * it failed to process it, for example, due to an out-of-memory error).
3760 *
3761 * The caller can specify the channels on the destination to be written via the
3762 * cmask parameter (which can be 0 to default to all channels), as well as a
3763 * swizzle to apply to the source via the cswizzle parameter (which can be NULL
3764 * to use the default identity swizzle).
3765 */
3766 static bool
blit_shader(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,VkFormat dst_format,struct v3dv_image * src,VkFormat src_format,VkColorComponentFlags cmask,VkComponentMapping * cswizzle,const VkImageBlit2KHR * _region,VkFilter filter,bool dst_is_padded_image)3767 blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
3768 struct v3dv_image *dst,
3769 VkFormat dst_format,
3770 struct v3dv_image *src,
3771 VkFormat src_format,
3772 VkColorComponentFlags cmask,
3773 VkComponentMapping *cswizzle,
3774 const VkImageBlit2KHR *_region,
3775 VkFilter filter,
3776 bool dst_is_padded_image)
3777 {
3778 bool handled = true;
3779 VkResult result;
3780 uint32_t dirty_dynamic_state = 0;
3781
3782 /* We don't support rendering to linear depth/stencil, this should have
3783 * been rewritten to a compatible color blit by the caller.
3784 */
3785 assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR ||
3786 !vk_format_is_depth_or_stencil(dst_format));
3787
3788 /* Can't sample from linear images */
3789 if (src->vk.tiling == VK_IMAGE_TILING_LINEAR && src->vk.image_type != VK_IMAGE_TYPE_1D)
3790 return false;
3791
3792 VkImageBlit2KHR region = *_region;
3793 /* Rewrite combined D/S blits to compatible color blits */
3794 if (vk_format_is_depth_or_stencil(dst_format)) {
3795 assert(src_format == dst_format);
3796 assert(cmask == 0);
3797 switch(dst_format) {
3798 case VK_FORMAT_D16_UNORM:
3799 dst_format = VK_FORMAT_R16_UINT;
3800 break;
3801 case VK_FORMAT_D32_SFLOAT:
3802 dst_format = VK_FORMAT_R32_UINT;
3803 break;
3804 case VK_FORMAT_X8_D24_UNORM_PACK32:
3805 case VK_FORMAT_D24_UNORM_S8_UINT:
3806 if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
3807 cmask |= VK_COLOR_COMPONENT_G_BIT |
3808 VK_COLOR_COMPONENT_B_BIT |
3809 VK_COLOR_COMPONENT_A_BIT;
3810 }
3811 if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
3812 assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
3813 cmask |= VK_COLOR_COMPONENT_R_BIT;
3814 }
3815 dst_format = VK_FORMAT_R8G8B8A8_UINT;
3816 break;
3817 default:
3818 unreachable("Unsupported depth/stencil format");
3819 };
3820 src_format = dst_format;
3821 region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3822 region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3823 }
3824
3825 const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
3826 VK_COLOR_COMPONENT_G_BIT |
3827 VK_COLOR_COMPONENT_B_BIT |
3828 VK_COLOR_COMPONENT_A_BIT;
3829 if (cmask == 0)
3830 cmask = full_cmask;
3831
3832 VkComponentMapping ident_swizzle = {
3833 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
3834 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
3835 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
3836 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
3837 };
3838 if (!cswizzle)
3839 cswizzle = &ident_swizzle;
3840
3841 /* When we get here from a copy between compressed / uncompressed images
3842 * we choose to specify the destination blit region based on the size
3843 * semantics of the source image of the copy (see copy_image_blit), so we
3844 * need to apply those same semantics here when we compute the size of the
3845 * destination image level.
3846 */
3847 const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
3848 const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
3849 const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
3850 const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
3851 const uint32_t dst_level_w =
3852 u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
3853 region.dstSubresource.mipLevel);
3854 const uint32_t dst_level_h =
3855 u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
3856 region.dstSubresource.mipLevel);
3857
3858 const uint32_t src_level_w =
3859 u_minify(src->vk.extent.width, region.srcSubresource.mipLevel);
3860 const uint32_t src_level_h =
3861 u_minify(src->vk.extent.height, region.srcSubresource.mipLevel);
3862 const uint32_t src_level_d =
3863 u_minify(src->vk.extent.depth, region.srcSubresource.mipLevel);
3864
3865 uint32_t dst_x, dst_y, dst_w, dst_h;
3866 bool dst_mirror_x, dst_mirror_y;
3867 compute_blit_box(region.dstOffsets,
3868 dst_level_w, dst_level_h,
3869 &dst_x, &dst_y, &dst_w, &dst_h,
3870 &dst_mirror_x, &dst_mirror_y);
3871
3872 uint32_t src_x, src_y, src_w, src_h;
3873 bool src_mirror_x, src_mirror_y;
3874 compute_blit_box(region.srcOffsets,
3875 src_level_w, src_level_h,
3876 &src_x, &src_y, &src_w, &src_h,
3877 &src_mirror_x, &src_mirror_y);
3878
3879 uint32_t min_dst_layer;
3880 uint32_t max_dst_layer;
3881 bool dst_mirror_z = false;
3882 if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
3883 min_dst_layer = region.dstSubresource.baseArrayLayer;
3884 max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
3885 } else {
3886 compute_blit_3d_layers(region.dstOffsets,
3887 &min_dst_layer, &max_dst_layer,
3888 &dst_mirror_z);
3889 }
3890
3891 uint32_t min_src_layer;
3892 uint32_t max_src_layer;
3893 bool src_mirror_z = false;
3894 if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
3895 min_src_layer = region.srcSubresource.baseArrayLayer;
3896 max_src_layer = min_src_layer + region.srcSubresource.layerCount;
3897 } else {
3898 compute_blit_3d_layers(region.srcOffsets,
3899 &min_src_layer, &max_src_layer,
3900 &src_mirror_z);
3901 }
3902
3903 uint32_t layer_count = max_dst_layer - min_dst_layer;
3904
3905 /* Translate source blit coordinates to normalized texture coordinates for
3906 * single sampled textures. For multisampled textures we require
3907 * unnormalized coordinates, since we can only do texelFetch on them.
3908 */
3909 float coords[4] = {
3910 (float)src_x,
3911 (float)src_y,
3912 (float)(src_x + src_w),
3913 (float)(src_y + src_h),
3914 };
3915
3916 if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
3917 coords[0] /= (float)src_level_w;
3918 coords[1] /= (float)src_level_h;
3919 coords[2] /= (float)src_level_w;
3920 coords[3] /= (float)src_level_h;
3921 }
3922
3923 /* Handle mirroring */
3924 const bool mirror_x = dst_mirror_x != src_mirror_x;
3925 const bool mirror_y = dst_mirror_y != src_mirror_y;
3926 const bool mirror_z = dst_mirror_z != src_mirror_z;
3927 float tex_coords[5] = {
3928 !mirror_x ? coords[0] : coords[2],
3929 !mirror_y ? coords[1] : coords[3],
3930 !mirror_x ? coords[2] : coords[0],
3931 !mirror_y ? coords[3] : coords[1],
3932 /* Z coordinate for 3D blit sources, to be filled for each
3933 * destination layer
3934 */
3935 0.0f
3936 };
3937
3938 /* For blits from 3D images we also need to compute the slice coordinate to
3939 * sample from, which will change for each layer in the destination.
3940 * Compute the step we should increase for each iteration.
3941 */
3942 const float src_z_step =
3943 (float)(max_src_layer - min_src_layer) / (float)layer_count;
3944
3945 /* Get the blit pipeline */
3946 struct v3dv_meta_blit_pipeline *pipeline = NULL;
3947 bool ok = get_blit_pipeline(cmd_buffer->device,
3948 dst_format, src_format, cmask, src->vk.image_type,
3949 dst->vk.samples, src->vk.samples,
3950 &pipeline);
3951 if (!ok)
3952 return handled;
3953 assert(pipeline && pipeline->pipeline &&
3954 pipeline->pass && pipeline->pass_no_load);
3955
3956 struct v3dv_device *device = cmd_buffer->device;
3957 assert(device->meta.blit.ds_layout);
3958
3959 VkDevice _device = v3dv_device_to_handle(device);
3960 VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
3961
3962 /* Create sampler for blit source image */
3963 VkSamplerCreateInfo sampler_info = {
3964 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
3965 .magFilter = filter,
3966 .minFilter = filter,
3967 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
3968 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
3969 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
3970 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
3971 };
3972 VkSampler sampler;
3973 result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
3974 &sampler);
3975 if (result != VK_SUCCESS)
3976 goto fail;
3977
3978 v3dv_cmd_buffer_add_private_obj(
3979 cmd_buffer, (uintptr_t)sampler,
3980 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
3981
3982 /* Push command buffer state before starting meta operation */
3983 v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
3984
3985 /* Push state that is common for all layers */
3986 v3dv_CmdBindPipeline(_cmd_buffer,
3987 VK_PIPELINE_BIND_POINT_GRAPHICS,
3988 pipeline->pipeline);
3989
3990 const VkViewport viewport = {
3991 .x = dst_x,
3992 .y = dst_y,
3993 .width = dst_w,
3994 .height = dst_h,
3995 .minDepth = 0.0f,
3996 .maxDepth = 1.0f
3997 };
3998 v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
3999
4000 const VkRect2D scissor = {
4001 .offset = { dst_x, dst_y },
4002 .extent = { dst_w, dst_h }
4003 };
4004 v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4005
4006 bool can_skip_tlb_load = false;
4007 const VkRect2D render_area = {
4008 .offset = { dst_x, dst_y },
4009 .extent = { dst_w, dst_h },
4010 };
4011
4012 /* Record per-layer commands */
4013 VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
4014 for (uint32_t i = 0; i < layer_count; i++) {
4015 /* Setup framebuffer */
4016 VkImageViewCreateInfo dst_image_view_info = {
4017 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4018 .image = v3dv_image_to_handle(dst),
4019 .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
4020 .format = dst_format,
4021 .subresourceRange = {
4022 .aspectMask = aspects,
4023 .baseMipLevel = region.dstSubresource.mipLevel,
4024 .levelCount = 1,
4025 .baseArrayLayer = min_dst_layer + i,
4026 .layerCount = 1
4027 },
4028 };
4029 VkImageView dst_image_view;
4030 result = v3dv_CreateImageView(_device, &dst_image_view_info,
4031 &device->vk.alloc, &dst_image_view);
4032 if (result != VK_SUCCESS)
4033 goto fail;
4034
4035 v3dv_cmd_buffer_add_private_obj(
4036 cmd_buffer, (uintptr_t)dst_image_view,
4037 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4038
4039 VkFramebufferCreateInfo fb_info = {
4040 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4041 .renderPass = pipeline->pass,
4042 .attachmentCount = 1,
4043 .pAttachments = &dst_image_view,
4044 .width = dst_x + dst_w,
4045 .height = dst_y + dst_h,
4046 .layers = 1,
4047 };
4048
4049 VkFramebuffer fb;
4050 result = v3dv_CreateFramebuffer(_device, &fb_info,
4051 &cmd_buffer->device->vk.alloc, &fb);
4052 if (result != VK_SUCCESS)
4053 goto fail;
4054
4055 struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4056 framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4057 fb_info.height == dst_level_h &&
4058 dst_is_padded_image;
4059
4060 v3dv_cmd_buffer_add_private_obj(
4061 cmd_buffer, (uintptr_t)fb,
4062 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4063
4064 /* Setup descriptor set for blit source texture. We don't have to
4065 * register the descriptor as a private command buffer object since
4066 * all descriptors will be freed automatically with the descriptor
4067 * pool.
4068 */
4069 VkDescriptorSet set;
4070 result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4071 if (result != VK_SUCCESS)
4072 goto fail;
4073
4074 VkImageViewCreateInfo src_image_view_info = {
4075 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4076 .image = v3dv_image_to_handle(src),
4077 .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
4078 .format = src_format,
4079 .components = *cswizzle,
4080 .subresourceRange = {
4081 .aspectMask = aspects,
4082 .baseMipLevel = region.srcSubresource.mipLevel,
4083 .levelCount = 1,
4084 .baseArrayLayer =
4085 src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4086 .layerCount = 1
4087 },
4088 };
4089 VkImageView src_image_view;
4090 result = v3dv_CreateImageView(_device, &src_image_view_info,
4091 &device->vk.alloc, &src_image_view);
4092 if (result != VK_SUCCESS)
4093 goto fail;
4094
4095 v3dv_cmd_buffer_add_private_obj(
4096 cmd_buffer, (uintptr_t)src_image_view,
4097 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4098
4099 VkDescriptorImageInfo image_info = {
4100 .sampler = sampler,
4101 .imageView = src_image_view,
4102 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4103 };
4104 VkWriteDescriptorSet write = {
4105 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4106 .dstSet = set,
4107 .dstBinding = 0,
4108 .dstArrayElement = 0,
4109 .descriptorCount = 1,
4110 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4111 .pImageInfo = &image_info,
4112 };
4113 v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4114
4115 v3dv_CmdBindDescriptorSets(_cmd_buffer,
4116 VK_PIPELINE_BIND_POINT_GRAPHICS,
4117 device->meta.blit.p_layout,
4118 0, 1, &set,
4119 0, NULL);
4120
4121 /* If the region we are about to blit is tile-aligned, then we can
4122 * use the render pass version that won't pre-load the tile buffer
4123 * with the dst image contents before the blit. The exception is when we
4124 * don't have a full color mask, since in that case we need to preserve
4125 * the original value of some of the color components.
4126 *
4127 * Since all layers have the same area, we only need to compute this for
4128 * the first.
4129 */
4130 if (i == 0) {
4131 struct v3dv_render_pass *pipeline_pass =
4132 v3dv_render_pass_from_handle(pipeline->pass);
4133 can_skip_tlb_load =
4134 cmask == full_cmask &&
4135 v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4136 framebuffer, pipeline_pass, 0);
4137 }
4138
4139 /* Record blit */
4140 VkRenderPassBeginInfo rp_info = {
4141 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4142 .renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4143 pipeline->pass,
4144 .framebuffer = fb,
4145 .renderArea = render_area,
4146 .clearValueCount = 0,
4147 };
4148
4149 v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
4150 struct v3dv_job *job = cmd_buffer->state.job;
4151 if (!job)
4152 goto fail;
4153
4154 /* For 3D blits we need to compute the source slice to blit from (the Z
4155 * coordinate of the source sample operation). We want to choose this
4156 * based on the ratio of the depth of the source and the destination
4157 * images, picking the coordinate in the middle of each step.
4158 */
4159 if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
4160 tex_coords[4] =
4161 !mirror_z ?
4162 (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4163 (max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4164 }
4165
4166 v3dv_CmdPushConstants(_cmd_buffer,
4167 device->meta.blit.p_layout,
4168 VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4169 &tex_coords);
4170
4171 v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4172
4173 v3dv_CmdEndRenderPass(_cmd_buffer);
4174 dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
4175 }
4176
4177 fail:
4178 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
4179
4180 return handled;
4181 }
4182
4183 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,const VkBlitImageInfo2KHR * pBlitImageInfo)4184 v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
4185 const VkBlitImageInfo2KHR *pBlitImageInfo)
4186 {
4187 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4188 V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4189 V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4190
4191 /* This command can only happen outside a render pass */
4192 assert(cmd_buffer->state.pass == NULL);
4193 assert(cmd_buffer->state.job == NULL);
4194
4195 /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4196 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
4197 src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4198
4199 /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4200 assert(!vk_format_is_compressed(dst->vk.format));
4201
4202 for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4203 if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))
4204 continue;
4205 if (blit_shader(cmd_buffer,
4206 dst, dst->vk.format,
4207 src, src->vk.format,
4208 0, NULL,
4209 &pBlitImageInfo->pRegions[i],
4210 pBlitImageInfo->filter, true)) {
4211 continue;
4212 }
4213 unreachable("Unsupported blit operation");
4214 }
4215 }
4216
4217 static bool
resolve_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2KHR * region)4218 resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4219 struct v3dv_image *dst,
4220 struct v3dv_image *src,
4221 const VkImageResolve2KHR *region)
4222 {
4223 if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, NULL) ||
4224 !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, NULL)) {
4225 return false;
4226 }
4227
4228 if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4229 return false;
4230
4231 const VkFormat fb_format = src->vk.format;
4232
4233 uint32_t num_layers;
4234 if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
4235 num_layers = region->dstSubresource.layerCount;
4236 else
4237 num_layers = region->extent.depth;
4238 assert(num_layers > 0);
4239
4240 struct v3dv_job *job =
4241 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4242 if (!job)
4243 return true;
4244
4245 const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
4246 const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
4247 const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4248 const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4249
4250 uint32_t internal_type, internal_bpp;
4251 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4252 (fb_format, region->srcSubresource.aspectMask,
4253 &internal_type, &internal_bpp);
4254
4255 v3dv_job_start_frame(job, width, height, num_layers, false,
4256 1, internal_bpp, true);
4257
4258 struct v3dv_meta_framebuffer framebuffer;
4259 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
4260 internal_type, &job->frame_tiling);
4261
4262 v3dv_X(job->device, job_emit_binning_flush)(job);
4263 v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
4264 &framebuffer, region);
4265
4266 v3dv_cmd_buffer_finish_job(cmd_buffer);
4267 return true;
4268 }
4269
4270 static bool
resolve_image_blit(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * dst,struct v3dv_image * src,const VkImageResolve2KHR * region)4271 resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4272 struct v3dv_image *dst,
4273 struct v3dv_image *src,
4274 const VkImageResolve2KHR *region)
4275 {
4276 const VkImageBlit2KHR blit_region = {
4277 .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
4278 .srcSubresource = region->srcSubresource,
4279 .srcOffsets = {
4280 region->srcOffset,
4281 {
4282 region->srcOffset.x + region->extent.width,
4283 region->srcOffset.y + region->extent.height,
4284 }
4285 },
4286 .dstSubresource = region->dstSubresource,
4287 .dstOffsets = {
4288 region->dstOffset,
4289 {
4290 region->dstOffset.x + region->extent.width,
4291 region->dstOffset.y + region->extent.height,
4292 }
4293 },
4294 };
4295 return blit_shader(cmd_buffer,
4296 dst, dst->vk.format,
4297 src, src->vk.format,
4298 0, NULL,
4299 &blit_region, VK_FILTER_NEAREST, true);
4300 }
4301
4302 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,const VkResolveImageInfo2KHR * info)4303 v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
4304 const VkResolveImageInfo2KHR *info)
4305
4306 {
4307 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4308 V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4309 V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4310
4311 /* This command can only happen outside a render pass */
4312 assert(cmd_buffer->state.pass == NULL);
4313 assert(cmd_buffer->state.job == NULL);
4314
4315 assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
4316 assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
4317
4318 for (uint32_t i = 0; i < info->regionCount; i++) {
4319 if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4320 continue;
4321 if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4322 continue;
4323 unreachable("Unsupported multismaple resolve operation");
4324 }
4325 }
4326