1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 static bool
lookup_blorp_shader(struct blorp_batch * batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)27 lookup_blorp_shader(struct blorp_batch *batch,
28 const void *key, uint32_t key_size,
29 uint32_t *kernel_out, void *prog_data_out)
30 {
31 struct blorp_context *blorp = batch->blorp;
32 struct anv_device *device = blorp->driver_ctx;
33
34 /* The default cache must be a real cache */
35 assert(device->default_pipeline_cache.cache);
36
37 struct anv_shader_bin *bin =
38 anv_pipeline_cache_search(&device->default_pipeline_cache, key, key_size);
39 if (!bin)
40 return false;
41
42 /* The cache already has a reference and it's not going anywhere so there
43 * is no need to hold a second reference.
44 */
45 anv_shader_bin_unref(device, bin);
46
47 *kernel_out = bin->kernel.offset;
48 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
49
50 return true;
51 }
52
53 static bool
upload_blorp_shader(struct blorp_batch * batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)54 upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
55 const void *key, uint32_t key_size,
56 const void *kernel, uint32_t kernel_size,
57 const struct brw_stage_prog_data *prog_data,
58 uint32_t prog_data_size,
59 uint32_t *kernel_out, void *prog_data_out)
60 {
61 struct blorp_context *blorp = batch->blorp;
62 struct anv_device *device = blorp->driver_ctx;
63
64 /* The blorp cache must be a real cache */
65 assert(device->default_pipeline_cache.cache);
66
67 struct anv_pipeline_bind_map bind_map = {
68 .surface_count = 0,
69 .sampler_count = 0,
70 };
71
72 struct anv_shader_bin *bin =
73 anv_pipeline_cache_upload_kernel(&device->default_pipeline_cache, stage,
74 key, key_size, kernel, kernel_size,
75 prog_data, prog_data_size,
76 NULL, 0, NULL, &bind_map);
77
78 if (!bin)
79 return false;
80
81 /* The cache already has a reference and it's not going anywhere so there
82 * is no need to hold a second reference.
83 */
84 anv_shader_bin_unref(device, bin);
85
86 *kernel_out = bin->kernel.offset;
87 *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
88
89 return true;
90 }
91
92 void
anv_device_init_blorp(struct anv_device * device)93 anv_device_init_blorp(struct anv_device *device)
94 {
95 blorp_init(&device->blorp, device, &device->isl_dev);
96 device->blorp.compiler = device->physical->compiler;
97 device->blorp.lookup_shader = lookup_blorp_shader;
98 device->blorp.upload_shader = upload_blorp_shader;
99 switch (device->info.verx10) {
100 case 70:
101 device->blorp.exec = gfx7_blorp_exec;
102 break;
103 case 75:
104 device->blorp.exec = gfx75_blorp_exec;
105 break;
106 case 80:
107 device->blorp.exec = gfx8_blorp_exec;
108 break;
109 case 90:
110 device->blorp.exec = gfx9_blorp_exec;
111 break;
112 case 110:
113 device->blorp.exec = gfx11_blorp_exec;
114 break;
115 case 120:
116 device->blorp.exec = gfx12_blorp_exec;
117 break;
118 case 125:
119 device->blorp.exec = gfx125_blorp_exec;
120 break;
121 default:
122 unreachable("Unknown hardware generation");
123 }
124 }
125
126 void
anv_device_finish_blorp(struct anv_device * device)127 anv_device_finish_blorp(struct anv_device *device)
128 {
129 blorp_finish(&device->blorp);
130 }
131
132 static void
anv_blorp_batch_init(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,enum blorp_batch_flags flags)133 anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
134 struct blorp_batch *batch, enum blorp_batch_flags flags)
135 {
136 if (!(cmd_buffer->pool->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
137 assert(cmd_buffer->pool->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
138 flags |= BLORP_BATCH_USE_COMPUTE;
139 }
140
141 blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
142 }
143
144 static void
anv_blorp_batch_finish(struct blorp_batch * batch)145 anv_blorp_batch_finish(struct blorp_batch *batch)
146 {
147 blorp_batch_finish(batch);
148 }
149
150 static void
get_blorp_surf_for_anv_buffer(struct anv_device * device,struct anv_buffer * buffer,uint64_t offset,uint32_t width,uint32_t height,uint32_t row_pitch,enum isl_format format,bool is_dest,struct blorp_surf * blorp_surf,struct isl_surf * isl_surf)151 get_blorp_surf_for_anv_buffer(struct anv_device *device,
152 struct anv_buffer *buffer, uint64_t offset,
153 uint32_t width, uint32_t height,
154 uint32_t row_pitch, enum isl_format format,
155 bool is_dest,
156 struct blorp_surf *blorp_surf,
157 struct isl_surf *isl_surf)
158 {
159 const struct isl_format_layout *fmtl =
160 isl_format_get_layout(format);
161 bool ok UNUSED;
162
163 /* ASTC is the only format which doesn't support linear layouts.
164 * Create an equivalently sized surface with ISL to get around this.
165 */
166 if (fmtl->txc == ISL_TXC_ASTC) {
167 /* Use an equivalently sized format */
168 format = ISL_FORMAT_R32G32B32A32_UINT;
169 assert(fmtl->bpb == isl_format_get_layout(format)->bpb);
170
171 /* Shrink the dimensions for the new format */
172 width = DIV_ROUND_UP(width, fmtl->bw);
173 height = DIV_ROUND_UP(height, fmtl->bh);
174 }
175
176 *blorp_surf = (struct blorp_surf) {
177 .surf = isl_surf,
178 .addr = {
179 .buffer = buffer->address.bo,
180 .offset = buffer->address.offset + offset,
181 .mocs = anv_mocs(device, buffer->address.bo,
182 is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
183 : ISL_SURF_USAGE_TEXTURE_BIT),
184 },
185 };
186
187 ok = isl_surf_init(&device->isl_dev, isl_surf,
188 .dim = ISL_SURF_DIM_2D,
189 .format = format,
190 .width = width,
191 .height = height,
192 .depth = 1,
193 .levels = 1,
194 .array_len = 1,
195 .samples = 1,
196 .row_pitch_B = row_pitch,
197 .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
198 : ISL_SURF_USAGE_TEXTURE_BIT,
199 .tiling_flags = ISL_TILING_LINEAR_BIT);
200 assert(ok);
201 }
202
203 /* Pick something high enough that it won't be used in core and low enough it
204 * will never map to an extension.
205 */
206 #define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
207
208 static struct blorp_address
anv_to_blorp_address(struct anv_address addr)209 anv_to_blorp_address(struct anv_address addr)
210 {
211 return (struct blorp_address) {
212 .buffer = addr.bo,
213 .offset = addr.offset,
214 };
215 }
216
217 static void
get_blorp_surf_for_anv_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,VkImageUsageFlags usage,VkImageLayout layout,enum isl_aux_usage aux_usage,struct blorp_surf * blorp_surf)218 get_blorp_surf_for_anv_image(const struct anv_device *device,
219 const struct anv_image *image,
220 VkImageAspectFlags aspect,
221 VkImageUsageFlags usage,
222 VkImageLayout layout,
223 enum isl_aux_usage aux_usage,
224 struct blorp_surf *blorp_surf)
225 {
226 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
227
228 if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
229 assert(usage != 0);
230 aux_usage = anv_layout_to_aux_usage(&device->info, image,
231 aspect, usage, layout);
232 }
233
234 isl_surf_usage_flags_t mocs_usage =
235 (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
236 ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
237
238 const struct anv_surface *surface = &image->planes[plane].primary_surface;
239 const struct anv_address address =
240 anv_image_address(image, &surface->memory_range);
241
242 *blorp_surf = (struct blorp_surf) {
243 .surf = &surface->isl,
244 .addr = {
245 .buffer = address.bo,
246 .offset = address.offset,
247 .mocs = anv_mocs(device, address.bo, mocs_usage),
248 },
249 };
250
251 if (aux_usage != ISL_AUX_USAGE_NONE) {
252 const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
253 const struct anv_address aux_address =
254 anv_image_address(image, &aux_surface->memory_range);
255
256 blorp_surf->aux_usage = aux_usage;
257 blorp_surf->aux_surf = &aux_surface->isl;
258
259 if (!anv_address_is_null(aux_address)) {
260 blorp_surf->aux_addr = (struct blorp_address) {
261 .buffer = aux_address.bo,
262 .offset = aux_address.offset,
263 .mocs = anv_mocs(device, aux_address.bo, 0),
264 };
265 }
266
267 /* If we're doing a partial resolve, then we need the indirect clear
268 * color. If we are doing a fast clear and want to store/update the
269 * clear color, we also pass the address to blorp, otherwise it will only
270 * stomp the CCS to a particular value and won't care about format or
271 * clear value
272 */
273 if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
274 const struct anv_address clear_color_addr =
275 anv_image_get_clear_color_addr(device, image, aspect);
276 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
277 } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
278 const struct anv_address clear_color_addr =
279 anv_image_get_clear_color_addr(device, image, aspect);
280 blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
281 blorp_surf->clear_color = (union isl_color_value) {
282 .f32 = { ANV_HZ_FC_VAL },
283 };
284 }
285 }
286 }
287
288 static bool
get_blorp_surf_for_anv_shadow_image(const struct anv_device * device,const struct anv_image * image,VkImageAspectFlags aspect,struct blorp_surf * blorp_surf)289 get_blorp_surf_for_anv_shadow_image(const struct anv_device *device,
290 const struct anv_image *image,
291 VkImageAspectFlags aspect,
292 struct blorp_surf *blorp_surf)
293 {
294
295 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
296 if (!anv_surface_is_valid(&image->planes[plane].shadow_surface))
297 return false;
298
299 const struct anv_surface *surface = &image->planes[plane].shadow_surface;
300 const struct anv_address address =
301 anv_image_address(image, &surface->memory_range);
302
303 *blorp_surf = (struct blorp_surf) {
304 .surf = &surface->isl,
305 .addr = {
306 .buffer = address.bo,
307 .offset = address.offset,
308 .mocs = anv_mocs(device, address.bo, ISL_SURF_USAGE_RENDER_TARGET_BIT),
309 },
310 };
311
312 return true;
313 }
314
315 static void
copy_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageCopy2KHR * region)316 copy_image(struct anv_cmd_buffer *cmd_buffer,
317 struct blorp_batch *batch,
318 struct anv_image *src_image,
319 VkImageLayout src_image_layout,
320 struct anv_image *dst_image,
321 VkImageLayout dst_image_layout,
322 const VkImageCopy2KHR *region)
323 {
324 VkOffset3D srcOffset =
325 anv_sanitize_image_offset(src_image->vk.image_type, region->srcOffset);
326 VkOffset3D dstOffset =
327 anv_sanitize_image_offset(dst_image->vk.image_type, region->dstOffset);
328 VkExtent3D extent =
329 anv_sanitize_image_extent(src_image->vk.image_type, region->extent);
330
331 const uint32_t dst_level = region->dstSubresource.mipLevel;
332 unsigned dst_base_layer, layer_count;
333 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
334 dst_base_layer = region->dstOffset.z;
335 layer_count = region->extent.depth;
336 } else {
337 dst_base_layer = region->dstSubresource.baseArrayLayer;
338 layer_count = vk_image_subresource_layer_count(&dst_image->vk,
339 ®ion->dstSubresource);
340 }
341
342 const uint32_t src_level = region->srcSubresource.mipLevel;
343 unsigned src_base_layer;
344 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
345 src_base_layer = region->srcOffset.z;
346 } else {
347 src_base_layer = region->srcSubresource.baseArrayLayer;
348 assert(layer_count ==
349 vk_image_subresource_layer_count(&src_image->vk,
350 ®ion->srcSubresource));
351 }
352
353 VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
354 dst_mask = region->dstSubresource.aspectMask;
355
356 assert(anv_image_aspects_compatible(src_mask, dst_mask));
357
358 if (util_bitcount(src_mask) > 1) {
359 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
360 struct blorp_surf src_surf, dst_surf;
361 get_blorp_surf_for_anv_image(cmd_buffer->device,
362 src_image, 1UL << aspect_bit,
363 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
364 src_image_layout, ISL_AUX_USAGE_NONE,
365 &src_surf);
366 get_blorp_surf_for_anv_image(cmd_buffer->device,
367 dst_image, 1UL << aspect_bit,
368 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
369 dst_image_layout, ISL_AUX_USAGE_NONE,
370 &dst_surf);
371 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
372 1UL << aspect_bit,
373 dst_surf.aux_usage, dst_level,
374 dst_base_layer, layer_count);
375
376 for (unsigned i = 0; i < layer_count; i++) {
377 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
378 &dst_surf, dst_level, dst_base_layer + i,
379 srcOffset.x, srcOffset.y,
380 dstOffset.x, dstOffset.y,
381 extent.width, extent.height);
382 }
383
384 struct blorp_surf dst_shadow_surf;
385 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
386 dst_image,
387 1UL << aspect_bit,
388 &dst_shadow_surf)) {
389 for (unsigned i = 0; i < layer_count; i++) {
390 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
391 &dst_shadow_surf, dst_level, dst_base_layer + i,
392 srcOffset.x, srcOffset.y,
393 dstOffset.x, dstOffset.y,
394 extent.width, extent.height);
395 }
396 }
397 }
398 } else {
399 struct blorp_surf src_surf, dst_surf;
400 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
401 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
402 src_image_layout, ISL_AUX_USAGE_NONE,
403 &src_surf);
404 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
405 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
406 dst_image_layout, ISL_AUX_USAGE_NONE,
407 &dst_surf);
408 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
409 dst_surf.aux_usage, dst_level,
410 dst_base_layer, layer_count);
411
412 for (unsigned i = 0; i < layer_count; i++) {
413 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
414 &dst_surf, dst_level, dst_base_layer + i,
415 srcOffset.x, srcOffset.y,
416 dstOffset.x, dstOffset.y,
417 extent.width, extent.height);
418 }
419
420 struct blorp_surf dst_shadow_surf;
421 if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
422 dst_image, dst_mask,
423 &dst_shadow_surf)) {
424 for (unsigned i = 0; i < layer_count; i++) {
425 blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
426 &dst_shadow_surf, dst_level, dst_base_layer + i,
427 srcOffset.x, srcOffset.y,
428 dstOffset.x, dstOffset.y,
429 extent.width, extent.height);
430 }
431 }
432 }
433 }
434
anv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,const VkCopyImageInfo2KHR * pCopyImageInfo)435 void anv_CmdCopyImage2KHR(
436 VkCommandBuffer commandBuffer,
437 const VkCopyImageInfo2KHR* pCopyImageInfo)
438 {
439 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
440 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
441 ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
442
443 struct blorp_batch batch;
444 anv_blorp_batch_init(cmd_buffer, &batch, 0);
445
446 for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
447 copy_image(cmd_buffer, &batch,
448 src_image, pCopyImageInfo->srcImageLayout,
449 dst_image, pCopyImageInfo->dstImageLayout,
450 &pCopyImageInfo->pRegions[r]);
451 }
452
453 anv_blorp_batch_finish(&batch);
454 }
455
456 static enum isl_format
isl_format_for_size(unsigned size_B)457 isl_format_for_size(unsigned size_B)
458 {
459 /* Prefer 32-bit per component formats for CmdFillBuffer */
460 switch (size_B) {
461 case 1: return ISL_FORMAT_R8_UINT;
462 case 2: return ISL_FORMAT_R16_UINT;
463 case 3: return ISL_FORMAT_R8G8B8_UINT;
464 case 4: return ISL_FORMAT_R32_UINT;
465 case 6: return ISL_FORMAT_R16G16B16_UINT;
466 case 8: return ISL_FORMAT_R32G32_UINT;
467 case 12: return ISL_FORMAT_R32G32B32_UINT;
468 case 16: return ISL_FORMAT_R32G32B32A32_UINT;
469 default:
470 unreachable("Unknown format size");
471 }
472 }
473
474 static void
copy_buffer_to_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_buffer * anv_buffer,struct anv_image * anv_image,VkImageLayout image_layout,const VkBufferImageCopy2KHR * region,bool buffer_to_image)475 copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
476 struct blorp_batch *batch,
477 struct anv_buffer *anv_buffer,
478 struct anv_image *anv_image,
479 VkImageLayout image_layout,
480 const VkBufferImageCopy2KHR* region,
481 bool buffer_to_image)
482 {
483 struct {
484 struct blorp_surf surf;
485 uint32_t level;
486 VkOffset3D offset;
487 } image, buffer, *src, *dst;
488
489 buffer.level = 0;
490 buffer.offset = (VkOffset3D) { 0, 0, 0 };
491
492 if (buffer_to_image) {
493 src = &buffer;
494 dst = ℑ
495 } else {
496 src = ℑ
497 dst = &buffer;
498 }
499
500 const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
501
502 get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
503 buffer_to_image ?
504 VK_IMAGE_USAGE_TRANSFER_DST_BIT :
505 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
506 image_layout, ISL_AUX_USAGE_NONE,
507 &image.surf);
508 image.offset =
509 anv_sanitize_image_offset(anv_image->vk.image_type, region->imageOffset);
510 image.level = region->imageSubresource.mipLevel;
511
512 VkExtent3D extent =
513 anv_sanitize_image_extent(anv_image->vk.image_type, region->imageExtent);
514 if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
515 image.offset.z = region->imageSubresource.baseArrayLayer;
516 extent.depth =
517 vk_image_subresource_layer_count(&anv_image->vk,
518 ®ion->imageSubresource);
519 }
520
521 const enum isl_format linear_format =
522 anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk.format,
523 aspect, VK_IMAGE_TILING_LINEAR);
524 const struct isl_format_layout *linear_fmtl =
525 isl_format_get_layout(linear_format);
526
527 const uint32_t buffer_row_length =
528 region->bufferRowLength ?
529 region->bufferRowLength : extent.width;
530
531 const uint32_t buffer_image_height =
532 region->bufferImageHeight ?
533 region->bufferImageHeight : extent.height;
534
535 const uint32_t buffer_row_pitch =
536 DIV_ROUND_UP(buffer_row_length, linear_fmtl->bw) *
537 (linear_fmtl->bpb / 8);
538
539 const uint32_t buffer_layer_stride =
540 DIV_ROUND_UP(buffer_image_height, linear_fmtl->bh) *
541 buffer_row_pitch;
542
543 /* Some formats have additional restrictions which may cause ISL to
544 * fail to create a surface for us. Some examples include:
545 *
546 * 1. ASTC formats are not allowed to be LINEAR and must be tiled
547 * 2. YCbCr formats have to have 2-pixel aligned strides
548 *
549 * To avoid these issues, we always bind the buffer as if it's a
550 * "normal" format like RGBA32_UINT. Since we're using blorp_copy,
551 * the format doesn't matter as long as it has the right bpb.
552 */
553 const VkExtent2D buffer_extent = {
554 .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
555 .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
556 };
557 const enum isl_format buffer_format =
558 isl_format_for_size(linear_fmtl->bpb / 8);
559
560 struct isl_surf buffer_isl_surf;
561 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
562 anv_buffer, region->bufferOffset,
563 buffer_extent.width, buffer_extent.height,
564 buffer_row_pitch, buffer_format, false,
565 &buffer.surf, &buffer_isl_surf);
566
567 bool dst_has_shadow = false;
568 struct blorp_surf dst_shadow_surf;
569 if (&image == dst) {
570 /* In this case, the source is the buffer and, since blorp takes its
571 * copy dimensions in terms of the source format, we have to use the
572 * scaled down version for compressed textures because the source
573 * format is an RGB format.
574 */
575 extent.width = buffer_extent.width;
576 extent.height = buffer_extent.height;
577
578 anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
579 aspect, dst->surf.aux_usage,
580 dst->level,
581 dst->offset.z, extent.depth);
582
583 dst_has_shadow =
584 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
585 anv_image, aspect,
586 &dst_shadow_surf);
587 }
588
589 for (unsigned z = 0; z < extent.depth; z++) {
590 blorp_copy(batch, &src->surf, src->level, src->offset.z,
591 &dst->surf, dst->level, dst->offset.z,
592 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
593 extent.width, extent.height);
594
595 if (dst_has_shadow) {
596 blorp_copy(batch, &src->surf, src->level, src->offset.z,
597 &dst_shadow_surf, dst->level, dst->offset.z,
598 src->offset.x, src->offset.y,
599 dst->offset.x, dst->offset.y,
600 extent.width, extent.height);
601 }
602
603 image.offset.z++;
604 buffer.surf.addr.offset += buffer_layer_stride;
605 }
606 }
607
anv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2KHR * pCopyBufferToImageInfo)608 void anv_CmdCopyBufferToImage2KHR(
609 VkCommandBuffer commandBuffer,
610 const VkCopyBufferToImageInfo2KHR* pCopyBufferToImageInfo)
611 {
612 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
613 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
614 ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
615
616 struct blorp_batch batch;
617 anv_blorp_batch_init(cmd_buffer, &batch, 0);
618
619 for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
620 copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
621 pCopyBufferToImageInfo->dstImageLayout,
622 &pCopyBufferToImageInfo->pRegions[r], true);
623 }
624
625 anv_blorp_batch_finish(&batch);
626 }
627
anv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2KHR * pCopyImageToBufferInfo)628 void anv_CmdCopyImageToBuffer2KHR(
629 VkCommandBuffer commandBuffer,
630 const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo)
631 {
632 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
633 ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
634 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
635
636 struct blorp_batch batch;
637 anv_blorp_batch_init(cmd_buffer, &batch, 0);
638
639 for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
640 copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
641 pCopyImageToBufferInfo->srcImageLayout,
642 &pCopyImageToBufferInfo->pRegions[r], false);
643 }
644
645 anv_blorp_batch_finish(&batch);
646
647 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
648 }
649
650 static bool
flip_coords(unsigned * src0,unsigned * src1,unsigned * dst0,unsigned * dst1)651 flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
652 {
653 bool flip = false;
654 if (*src0 > *src1) {
655 unsigned tmp = *src0;
656 *src0 = *src1;
657 *src1 = tmp;
658 flip = !flip;
659 }
660
661 if (*dst0 > *dst1) {
662 unsigned tmp = *dst0;
663 *dst0 = *dst1;
664 *dst1 = tmp;
665 flip = !flip;
666 }
667
668 return flip;
669 }
670
671 static void
blit_image(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageBlit2KHR * region,VkFilter filter)672 blit_image(struct anv_cmd_buffer *cmd_buffer,
673 struct blorp_batch *batch,
674 struct anv_image *src_image,
675 VkImageLayout src_image_layout,
676 struct anv_image *dst_image,
677 VkImageLayout dst_image_layout,
678 const VkImageBlit2KHR *region,
679 VkFilter filter)
680 {
681 const VkImageSubresourceLayers *src_res = ®ion->srcSubresource;
682 const VkImageSubresourceLayers *dst_res = ®ion->dstSubresource;
683
684 struct blorp_surf src, dst;
685
686 enum blorp_filter blorp_filter;
687 switch (filter) {
688 case VK_FILTER_NEAREST:
689 blorp_filter = BLORP_FILTER_NEAREST;
690 break;
691 case VK_FILTER_LINEAR:
692 blorp_filter = BLORP_FILTER_BILINEAR;
693 break;
694 default:
695 unreachable("Invalid filter");
696 }
697
698 assert(anv_image_aspects_compatible(src_res->aspectMask,
699 dst_res->aspectMask));
700
701 anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
702 get_blorp_surf_for_anv_image(cmd_buffer->device,
703 src_image, 1U << aspect_bit,
704 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
705 src_image_layout, ISL_AUX_USAGE_NONE, &src);
706 get_blorp_surf_for_anv_image(cmd_buffer->device,
707 dst_image, 1U << aspect_bit,
708 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
709 dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
710
711 struct anv_format_plane src_format =
712 anv_get_format_aspect(&cmd_buffer->device->info, src_image->vk.format,
713 1U << aspect_bit, src_image->vk.tiling);
714 struct anv_format_plane dst_format =
715 anv_get_format_aspect(&cmd_buffer->device->info, dst_image->vk.format,
716 1U << aspect_bit, dst_image->vk.tiling);
717
718 unsigned dst_start, dst_end;
719 if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
720 assert(dst_res->baseArrayLayer == 0);
721 dst_start = region->dstOffsets[0].z;
722 dst_end = region->dstOffsets[1].z;
723 } else {
724 dst_start = dst_res->baseArrayLayer;
725 dst_end = dst_start +
726 vk_image_subresource_layer_count(&dst_image->vk, dst_res);
727 }
728
729 unsigned src_start, src_end;
730 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
731 assert(src_res->baseArrayLayer == 0);
732 src_start = region->srcOffsets[0].z;
733 src_end = region->srcOffsets[1].z;
734 } else {
735 src_start = src_res->baseArrayLayer;
736 src_end = src_start +
737 vk_image_subresource_layer_count(&src_image->vk, src_res);
738 }
739
740 bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
741 const unsigned num_layers = dst_end - dst_start;
742 float src_z_step = (float)(src_end - src_start) / (float)num_layers;
743
744 /* There is no interpolation to the pixel center during rendering, so
745 * add the 0.5 offset ourselves here. */
746 float depth_center_offset = 0;
747 if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
748 depth_center_offset = 0.5 / num_layers * (src_end - src_start);
749
750 if (flip_z) {
751 src_start = src_end;
752 src_z_step *= -1;
753 depth_center_offset *= -1;
754 }
755
756 unsigned src_x0 = region->srcOffsets[0].x;
757 unsigned src_x1 = region->srcOffsets[1].x;
758 unsigned dst_x0 = region->dstOffsets[0].x;
759 unsigned dst_x1 = region->dstOffsets[1].x;
760 bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
761
762 unsigned src_y0 = region->srcOffsets[0].y;
763 unsigned src_y1 = region->srcOffsets[1].y;
764 unsigned dst_y0 = region->dstOffsets[0].y;
765 unsigned dst_y1 = region->dstOffsets[1].y;
766 bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
767
768 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
769 1U << aspect_bit,
770 dst.aux_usage,
771 dst_res->mipLevel,
772 dst_start, num_layers);
773
774 for (unsigned i = 0; i < num_layers; i++) {
775 unsigned dst_z = dst_start + i;
776 float src_z = src_start + i * src_z_step + depth_center_offset;
777
778 blorp_blit(batch, &src, src_res->mipLevel, src_z,
779 src_format.isl_format, src_format.swizzle,
780 &dst, dst_res->mipLevel, dst_z,
781 dst_format.isl_format, dst_format.swizzle,
782 src_x0, src_y0, src_x1, src_y1,
783 dst_x0, dst_y0, dst_x1, dst_y1,
784 blorp_filter, flip_x, flip_y);
785 }
786 }
787 }
788
anv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,const VkBlitImageInfo2KHR * pBlitImageInfo)789 void anv_CmdBlitImage2KHR(
790 VkCommandBuffer commandBuffer,
791 const VkBlitImageInfo2KHR* pBlitImageInfo)
792 {
793 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
794 ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
795 ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
796
797 struct blorp_batch batch;
798 anv_blorp_batch_init(cmd_buffer, &batch, 0);
799
800 for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
801 blit_image(cmd_buffer, &batch,
802 src_image, pBlitImageInfo->srcImageLayout,
803 dst_image, pBlitImageInfo->dstImageLayout,
804 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
805 }
806
807 anv_blorp_batch_finish(&batch);
808 }
809
810 /**
811 * Returns the greatest common divisor of a and b that is a power of two.
812 */
813 static uint64_t
gcd_pow2_u64(uint64_t a,uint64_t b)814 gcd_pow2_u64(uint64_t a, uint64_t b)
815 {
816 assert(a > 0 || b > 0);
817
818 unsigned a_log2 = ffsll(a) - 1;
819 unsigned b_log2 = ffsll(b) - 1;
820
821 /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
822 * case, the MIN2() will take the other one. If both are 0 then we will
823 * hit the assert above.
824 */
825 return 1 << MIN2(a_log2, b_log2);
826 }
827
828 /* This is maximum possible width/height our HW can handle */
829 #define MAX_SURFACE_DIM (1ull << 14)
830
831 static void
copy_buffer(struct anv_device * device,struct blorp_batch * batch,struct anv_buffer * src_buffer,struct anv_buffer * dst_buffer,const VkBufferCopy2KHR * region)832 copy_buffer(struct anv_device *device,
833 struct blorp_batch *batch,
834 struct anv_buffer *src_buffer,
835 struct anv_buffer *dst_buffer,
836 const VkBufferCopy2KHR *region)
837 {
838 struct blorp_address src = {
839 .buffer = src_buffer->address.bo,
840 .offset = src_buffer->address.offset + region->srcOffset,
841 .mocs = anv_mocs(device, src_buffer->address.bo,
842 ISL_SURF_USAGE_TEXTURE_BIT),
843 };
844 struct blorp_address dst = {
845 .buffer = dst_buffer->address.bo,
846 .offset = dst_buffer->address.offset + region->dstOffset,
847 .mocs = anv_mocs(device, dst_buffer->address.bo,
848 ISL_SURF_USAGE_RENDER_TARGET_BIT),
849 };
850
851 blorp_buffer_copy(batch, src, dst, region->size);
852 }
853
anv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2KHR * pCopyBufferInfo)854 void anv_CmdCopyBuffer2KHR(
855 VkCommandBuffer commandBuffer,
856 const VkCopyBufferInfo2KHR* pCopyBufferInfo)
857 {
858 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
859 ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
860 ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
861
862 struct blorp_batch batch;
863 anv_blorp_batch_init(cmd_buffer, &batch, 0);
864
865 for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
866 copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
867 &pCopyBufferInfo->pRegions[r]);
868 }
869
870 anv_blorp_batch_finish(&batch);
871
872 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
873 }
874
875
anv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)876 void anv_CmdUpdateBuffer(
877 VkCommandBuffer commandBuffer,
878 VkBuffer dstBuffer,
879 VkDeviceSize dstOffset,
880 VkDeviceSize dataSize,
881 const void* pData)
882 {
883 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
884 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
885
886 struct blorp_batch batch;
887 anv_blorp_batch_init(cmd_buffer, &batch, 0);
888
889 /* We can't quite grab a full block because the state stream needs a
890 * little data at the top to build its linked list.
891 */
892 const uint32_t max_update_size =
893 cmd_buffer->device->dynamic_state_pool.block_size - 64;
894
895 assert(max_update_size < MAX_SURFACE_DIM * 4);
896
897 /* We're about to read data that was written from the CPU. Flush the
898 * texture cache so we don't get anything stale.
899 */
900 anv_add_pending_pipe_bits(cmd_buffer,
901 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
902 "before UpdateBuffer");
903
904 while (dataSize) {
905 const uint32_t copy_size = MIN2(dataSize, max_update_size);
906
907 struct anv_state tmp_data =
908 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
909
910 memcpy(tmp_data.map, pData, copy_size);
911
912 struct blorp_address src = {
913 .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
914 .offset = tmp_data.offset,
915 .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
916 ISL_SURF_USAGE_TEXTURE_BIT, false)
917 };
918 struct blorp_address dst = {
919 .buffer = dst_buffer->address.bo,
920 .offset = dst_buffer->address.offset + dstOffset,
921 .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
922 ISL_SURF_USAGE_RENDER_TARGET_BIT),
923 };
924
925 blorp_buffer_copy(&batch, src, dst, copy_size);
926
927 dataSize -= copy_size;
928 dstOffset += copy_size;
929 pData = (void *)pData + copy_size;
930 }
931
932 anv_blorp_batch_finish(&batch);
933
934 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
935 }
936
anv_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)937 void anv_CmdFillBuffer(
938 VkCommandBuffer commandBuffer,
939 VkBuffer dstBuffer,
940 VkDeviceSize dstOffset,
941 VkDeviceSize fillSize,
942 uint32_t data)
943 {
944 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
945 ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
946 struct blorp_surf surf;
947 struct isl_surf isl_surf;
948
949 struct blorp_batch batch;
950 anv_blorp_batch_init(cmd_buffer, &batch, 0);
951
952 fillSize = anv_buffer_get_range(dst_buffer, dstOffset, fillSize);
953
954 /* From the Vulkan spec:
955 *
956 * "size is the number of bytes to fill, and must be either a multiple
957 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
958 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
959 * buffer is not a multiple of 4, then the nearest smaller multiple is
960 * used."
961 */
962 fillSize &= ~3ull;
963
964 /* First, we compute the biggest format that can be used with the
965 * given offsets and size.
966 */
967 int bs = 16;
968 bs = gcd_pow2_u64(bs, dstOffset);
969 bs = gcd_pow2_u64(bs, fillSize);
970 enum isl_format isl_format = isl_format_for_size(bs);
971
972 union isl_color_value color = {
973 .u32 = { data, data, data, data },
974 };
975
976 const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
977 while (fillSize >= max_fill_size) {
978 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
979 dst_buffer, dstOffset,
980 MAX_SURFACE_DIM, MAX_SURFACE_DIM,
981 MAX_SURFACE_DIM * bs, isl_format, true,
982 &surf, &isl_surf);
983
984 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
985 0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
986 color, 0 /* color_write_disable */);
987 fillSize -= max_fill_size;
988 dstOffset += max_fill_size;
989 }
990
991 uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
992 assert(height < MAX_SURFACE_DIM);
993 if (height != 0) {
994 const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
995 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
996 dst_buffer, dstOffset,
997 MAX_SURFACE_DIM, height,
998 MAX_SURFACE_DIM * bs, isl_format, true,
999 &surf, &isl_surf);
1000
1001 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1002 0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
1003 color, 0 /* color_write_disable */);
1004 fillSize -= rect_fill_size;
1005 dstOffset += rect_fill_size;
1006 }
1007
1008 if (fillSize != 0) {
1009 const uint32_t width = fillSize / bs;
1010 get_blorp_surf_for_anv_buffer(cmd_buffer->device,
1011 dst_buffer, dstOffset,
1012 width, 1,
1013 width * bs, isl_format, true,
1014 &surf, &isl_surf);
1015
1016 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1017 0, 0, 1, 0, 0, width, 1,
1018 color, 0 /* color_write_disable */);
1019 }
1020
1021 anv_blorp_batch_finish(&batch);
1022
1023 cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
1024 }
1025
anv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1026 void anv_CmdClearColorImage(
1027 VkCommandBuffer commandBuffer,
1028 VkImage _image,
1029 VkImageLayout imageLayout,
1030 const VkClearColorValue* pColor,
1031 uint32_t rangeCount,
1032 const VkImageSubresourceRange* pRanges)
1033 {
1034 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1035 ANV_FROM_HANDLE(anv_image, image, _image);
1036
1037 struct blorp_batch batch;
1038 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1039
1040 for (unsigned r = 0; r < rangeCount; r++) {
1041 if (pRanges[r].aspectMask == 0)
1042 continue;
1043
1044 assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1045
1046 struct blorp_surf surf;
1047 get_blorp_surf_for_anv_image(cmd_buffer->device,
1048 image, pRanges[r].aspectMask,
1049 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1050 imageLayout, ISL_AUX_USAGE_NONE, &surf);
1051
1052 struct anv_format_plane src_format =
1053 anv_get_format_aspect(&cmd_buffer->device->info, image->vk.format,
1054 VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1055
1056 unsigned base_layer = pRanges[r].baseArrayLayer;
1057 uint32_t layer_count =
1058 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1059 uint32_t level_count =
1060 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1061
1062 for (uint32_t i = 0; i < level_count; i++) {
1063 const unsigned level = pRanges[r].baseMipLevel + i;
1064 const unsigned level_width = anv_minify(image->vk.extent.width, level);
1065 const unsigned level_height = anv_minify(image->vk.extent.height, level);
1066
1067 if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1068 base_layer = 0;
1069 layer_count = anv_minify(image->vk.extent.depth, level);
1070 }
1071
1072 anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1073 pRanges[r].aspectMask,
1074 surf.aux_usage, level,
1075 base_layer, layer_count);
1076
1077 blorp_clear(&batch, &surf,
1078 src_format.isl_format, src_format.swizzle,
1079 level, base_layer, layer_count,
1080 0, 0, level_width, level_height,
1081 vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1082 }
1083 }
1084
1085 anv_blorp_batch_finish(&batch);
1086 }
1087
anv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1088 void anv_CmdClearDepthStencilImage(
1089 VkCommandBuffer commandBuffer,
1090 VkImage image_h,
1091 VkImageLayout imageLayout,
1092 const VkClearDepthStencilValue* pDepthStencil,
1093 uint32_t rangeCount,
1094 const VkImageSubresourceRange* pRanges)
1095 {
1096 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1097 ANV_FROM_HANDLE(anv_image, image, image_h);
1098
1099 struct blorp_batch batch;
1100 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1101 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1102
1103 struct blorp_surf depth, stencil, stencil_shadow;
1104 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1105 get_blorp_surf_for_anv_image(cmd_buffer->device,
1106 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1107 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1108 imageLayout, ISL_AUX_USAGE_NONE, &depth);
1109 } else {
1110 memset(&depth, 0, sizeof(depth));
1111 }
1112
1113 bool has_stencil_shadow = false;
1114 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1115 get_blorp_surf_for_anv_image(cmd_buffer->device,
1116 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1117 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1118 imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1119
1120 has_stencil_shadow =
1121 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1122 VK_IMAGE_ASPECT_STENCIL_BIT,
1123 &stencil_shadow);
1124 } else {
1125 memset(&stencil, 0, sizeof(stencil));
1126 }
1127
1128 for (unsigned r = 0; r < rangeCount; r++) {
1129 if (pRanges[r].aspectMask == 0)
1130 continue;
1131
1132 bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1133 bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1134
1135 unsigned base_layer = pRanges[r].baseArrayLayer;
1136 uint32_t layer_count =
1137 vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1138 uint32_t level_count =
1139 vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1140
1141 for (uint32_t i = 0; i < level_count; i++) {
1142 const unsigned level = pRanges[r].baseMipLevel + i;
1143 const unsigned level_width = anv_minify(image->vk.extent.width, level);
1144 const unsigned level_height = anv_minify(image->vk.extent.height, level);
1145
1146 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1147 layer_count = anv_minify(image->vk.extent.depth, level);
1148
1149 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1150 level, base_layer, layer_count,
1151 0, 0, level_width, level_height,
1152 clear_depth, pDepthStencil->depth,
1153 clear_stencil ? 0xff : 0,
1154 pDepthStencil->stencil);
1155
1156 if (clear_stencil && has_stencil_shadow) {
1157 union isl_color_value stencil_color = {
1158 .u32 = { pDepthStencil->stencil, },
1159 };
1160 blorp_clear(&batch, &stencil_shadow,
1161 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1162 level, base_layer, layer_count,
1163 0, 0, level_width, level_height,
1164 stencil_color, 0 /* color_write_disable */);
1165 }
1166 }
1167 }
1168
1169 anv_blorp_batch_finish(&batch);
1170 }
1171
1172 VkResult
anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer * cmd_buffer,uint32_t num_entries,uint32_t * state_offset,struct anv_state * bt_state)1173 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1174 uint32_t num_entries,
1175 uint32_t *state_offset,
1176 struct anv_state *bt_state)
1177 {
1178 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1179 state_offset);
1180 if (bt_state->map == NULL) {
1181 /* We ran out of space. Grab a new binding table block. */
1182 VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1183 if (result != VK_SUCCESS)
1184 return result;
1185
1186 /* Re-emit state base addresses so we get the new surface state base
1187 * address before we start emitting binding tables etc.
1188 */
1189 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1190
1191 *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1192 state_offset);
1193 assert(bt_state->map != NULL);
1194 }
1195
1196 return VK_SUCCESS;
1197 }
1198
1199 static VkResult
binding_table_for_surface_state(struct anv_cmd_buffer * cmd_buffer,struct anv_state surface_state,uint32_t * bt_offset)1200 binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1201 struct anv_state surface_state,
1202 uint32_t *bt_offset)
1203 {
1204 uint32_t state_offset;
1205 struct anv_state bt_state;
1206
1207 VkResult result =
1208 anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1209 &bt_state);
1210 if (result != VK_SUCCESS)
1211 return result;
1212
1213 uint32_t *bt_map = bt_state.map;
1214 bt_map[0] = surface_state.offset + state_offset;
1215
1216 *bt_offset = bt_state.offset;
1217 return VK_SUCCESS;
1218 }
1219
1220 static void
clear_color_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1221 clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1222 struct blorp_batch *batch,
1223 const VkClearAttachment *attachment,
1224 uint32_t rectCount, const VkClearRect *pRects)
1225 {
1226 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1227 const uint32_t color_att = attachment->colorAttachment;
1228 assert(color_att < subpass->color_count);
1229 const uint32_t att_idx = subpass->color_attachments[color_att].attachment;
1230
1231 if (att_idx == VK_ATTACHMENT_UNUSED)
1232 return;
1233
1234 struct anv_render_pass_attachment *pass_att =
1235 &cmd_buffer->state.pass->attachments[att_idx];
1236 struct anv_attachment_state *att_state =
1237 &cmd_buffer->state.attachments[att_idx];
1238
1239 uint32_t binding_table;
1240 VkResult result =
1241 binding_table_for_surface_state(cmd_buffer, att_state->color.state,
1242 &binding_table);
1243 if (result != VK_SUCCESS)
1244 return;
1245
1246 union isl_color_value clear_color =
1247 vk_to_isl_color(attachment->clearValue.color);
1248
1249 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1250 if (subpass->view_mask) {
1251 u_foreach_bit(view_idx, subpass->view_mask) {
1252 for (uint32_t r = 0; r < rectCount; ++r) {
1253 const VkOffset2D offset = pRects[r].rect.offset;
1254 const VkExtent2D extent = pRects[r].rect.extent;
1255 blorp_clear_attachments(batch, binding_table,
1256 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1257 view_idx, 1,
1258 offset.x, offset.y,
1259 offset.x + extent.width,
1260 offset.y + extent.height,
1261 true, clear_color, false, 0.0f, 0, 0);
1262 }
1263 }
1264 return;
1265 }
1266
1267 for (uint32_t r = 0; r < rectCount; ++r) {
1268 const VkOffset2D offset = pRects[r].rect.offset;
1269 const VkExtent2D extent = pRects[r].rect.extent;
1270 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1271 blorp_clear_attachments(batch, binding_table,
1272 ISL_FORMAT_UNSUPPORTED, pass_att->samples,
1273 pRects[r].baseArrayLayer,
1274 pRects[r].layerCount,
1275 offset.x, offset.y,
1276 offset.x + extent.width, offset.y + extent.height,
1277 true, clear_color, false, 0.0f, 0, 0);
1278 }
1279 }
1280
1281 static void
clear_depth_stencil_attachment(struct anv_cmd_buffer * cmd_buffer,struct blorp_batch * batch,const VkClearAttachment * attachment,uint32_t rectCount,const VkClearRect * pRects)1282 clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1283 struct blorp_batch *batch,
1284 const VkClearAttachment *attachment,
1285 uint32_t rectCount, const VkClearRect *pRects)
1286 {
1287 static const union isl_color_value color_value = { .u32 = { 0, } };
1288 const struct anv_subpass *subpass = cmd_buffer->state.subpass;
1289 if (!subpass->depth_stencil_attachment)
1290 return;
1291
1292 const uint32_t att_idx = subpass->depth_stencil_attachment->attachment;
1293 assert(att_idx != VK_ATTACHMENT_UNUSED);
1294 struct anv_render_pass_attachment *pass_att =
1295 &cmd_buffer->state.pass->attachments[att_idx];
1296
1297 bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1298 bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1299
1300 enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1301 if (clear_depth) {
1302 depth_format = anv_get_isl_format(&cmd_buffer->device->info,
1303 pass_att->format,
1304 VK_IMAGE_ASPECT_DEPTH_BIT,
1305 VK_IMAGE_TILING_OPTIMAL);
1306 }
1307
1308 uint32_t binding_table;
1309 VkResult result =
1310 binding_table_for_surface_state(cmd_buffer,
1311 cmd_buffer->state.null_surface_state,
1312 &binding_table);
1313 if (result != VK_SUCCESS)
1314 return;
1315
1316 /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1317 if (subpass->view_mask) {
1318 u_foreach_bit(view_idx, subpass->view_mask) {
1319 for (uint32_t r = 0; r < rectCount; ++r) {
1320 const VkOffset2D offset = pRects[r].rect.offset;
1321 const VkExtent2D extent = pRects[r].rect.extent;
1322 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1323 blorp_clear_attachments(batch, binding_table,
1324 depth_format, pass_att->samples,
1325 view_idx, 1,
1326 offset.x, offset.y,
1327 offset.x + extent.width,
1328 offset.y + extent.height,
1329 false, color_value,
1330 clear_depth, value.depth,
1331 clear_stencil ? 0xff : 0, value.stencil);
1332 }
1333 }
1334 return;
1335 }
1336
1337 for (uint32_t r = 0; r < rectCount; ++r) {
1338 const VkOffset2D offset = pRects[r].rect.offset;
1339 const VkExtent2D extent = pRects[r].rect.extent;
1340 VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1341 assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1342 blorp_clear_attachments(batch, binding_table,
1343 depth_format, pass_att->samples,
1344 pRects[r].baseArrayLayer,
1345 pRects[r].layerCount,
1346 offset.x, offset.y,
1347 offset.x + extent.width, offset.y + extent.height,
1348 false, color_value,
1349 clear_depth, value.depth,
1350 clear_stencil ? 0xff : 0, value.stencil);
1351 }
1352 }
1353
anv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1354 void anv_CmdClearAttachments(
1355 VkCommandBuffer commandBuffer,
1356 uint32_t attachmentCount,
1357 const VkClearAttachment* pAttachments,
1358 uint32_t rectCount,
1359 const VkClearRect* pRects)
1360 {
1361 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1362
1363 /* Because this gets called within a render pass, we tell blorp not to
1364 * trash our depth and stencil buffers.
1365 */
1366 struct blorp_batch batch;
1367 enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1368 if (cmd_buffer->state.conditional_render_enabled) {
1369 anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1370 flags |= BLORP_BATCH_PREDICATE_ENABLE;
1371 }
1372 anv_blorp_batch_init(cmd_buffer, &batch, flags);
1373
1374 for (uint32_t a = 0; a < attachmentCount; ++a) {
1375 if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1376 assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1377 clear_color_attachment(cmd_buffer, &batch,
1378 &pAttachments[a],
1379 rectCount, pRects);
1380 } else {
1381 clear_depth_stencil_attachment(cmd_buffer, &batch,
1382 &pAttachments[a],
1383 rectCount, pRects);
1384 }
1385 }
1386
1387 anv_blorp_batch_finish(&batch);
1388 }
1389
1390 enum subpass_stage {
1391 SUBPASS_STAGE_LOAD,
1392 SUBPASS_STAGE_DRAW,
1393 SUBPASS_STAGE_RESOLVE,
1394 };
1395
1396 void
anv_image_msaa_resolve(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * src_image,enum isl_aux_usage src_aux_usage,uint32_t src_level,uint32_t src_base_layer,const struct anv_image * dst_image,enum isl_aux_usage dst_aux_usage,uint32_t dst_level,uint32_t dst_base_layer,VkImageAspectFlagBits aspect,uint32_t src_x,uint32_t src_y,uint32_t dst_x,uint32_t dst_y,uint32_t width,uint32_t height,uint32_t layer_count,enum blorp_filter filter)1397 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1398 const struct anv_image *src_image,
1399 enum isl_aux_usage src_aux_usage,
1400 uint32_t src_level, uint32_t src_base_layer,
1401 const struct anv_image *dst_image,
1402 enum isl_aux_usage dst_aux_usage,
1403 uint32_t dst_level, uint32_t dst_base_layer,
1404 VkImageAspectFlagBits aspect,
1405 uint32_t src_x, uint32_t src_y,
1406 uint32_t dst_x, uint32_t dst_y,
1407 uint32_t width, uint32_t height,
1408 uint32_t layer_count,
1409 enum blorp_filter filter)
1410 {
1411 struct blorp_batch batch;
1412 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1413 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1414
1415 assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
1416 assert(src_image->vk.samples > 1);
1417 assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
1418 assert(dst_image->vk.samples == 1);
1419 assert(src_image->n_planes == dst_image->n_planes);
1420
1421 struct blorp_surf src_surf, dst_surf;
1422 get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
1423 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1424 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1425 src_aux_usage, &src_surf);
1426 if (src_aux_usage == ISL_AUX_USAGE_MCS) {
1427 src_surf.clear_color_addr = anv_to_blorp_address(
1428 anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
1429 VK_IMAGE_ASPECT_COLOR_BIT));
1430 }
1431 get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
1432 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1433 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1434 dst_aux_usage, &dst_surf);
1435 anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1436 aspect, dst_aux_usage,
1437 dst_level, dst_base_layer, layer_count);
1438
1439 if (filter == BLORP_FILTER_NONE) {
1440 /* If no explicit filter is provided, then it's implied by the type of
1441 * the source image.
1442 */
1443 if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
1444 (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
1445 isl_format_has_int_channel(src_surf.surf->format)) {
1446 filter = BLORP_FILTER_SAMPLE_0;
1447 } else {
1448 filter = BLORP_FILTER_AVERAGE;
1449 }
1450 }
1451
1452 for (uint32_t l = 0; l < layer_count; l++) {
1453 blorp_blit(&batch,
1454 &src_surf, src_level, src_base_layer + l,
1455 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1456 &dst_surf, dst_level, dst_base_layer + l,
1457 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1458 src_x, src_y, src_x + width, src_y + height,
1459 dst_x, dst_y, dst_x + width, dst_y + height,
1460 filter, false, false);
1461 }
1462
1463 anv_blorp_batch_finish(&batch);
1464 }
1465
1466 static void
resolve_image(struct anv_cmd_buffer * cmd_buffer,struct anv_image * src_image,VkImageLayout src_image_layout,struct anv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2KHR * region)1467 resolve_image(struct anv_cmd_buffer *cmd_buffer,
1468 struct anv_image *src_image,
1469 VkImageLayout src_image_layout,
1470 struct anv_image *dst_image,
1471 VkImageLayout dst_image_layout,
1472 const VkImageResolve2KHR *region)
1473 {
1474 assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
1475 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
1476 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
1477
1478 const uint32_t layer_count =
1479 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource);
1480
1481 anv_foreach_image_aspect_bit(aspect_bit, src_image,
1482 region->srcSubresource.aspectMask) {
1483 enum isl_aux_usage src_aux_usage =
1484 anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image,
1485 (1 << aspect_bit),
1486 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1487 src_image_layout);
1488 enum isl_aux_usage dst_aux_usage =
1489 anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image,
1490 (1 << aspect_bit),
1491 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1492 dst_image_layout);
1493
1494 anv_image_msaa_resolve(cmd_buffer,
1495 src_image, src_aux_usage,
1496 region->srcSubresource.mipLevel,
1497 region->srcSubresource.baseArrayLayer,
1498 dst_image, dst_aux_usage,
1499 region->dstSubresource.mipLevel,
1500 region->dstSubresource.baseArrayLayer,
1501 (1 << aspect_bit),
1502 region->srcOffset.x,
1503 region->srcOffset.y,
1504 region->dstOffset.x,
1505 region->dstOffset.y,
1506 region->extent.width,
1507 region->extent.height,
1508 layer_count, BLORP_FILTER_NONE);
1509 }
1510 }
1511
anv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,const VkResolveImageInfo2KHR * pResolveImageInfo)1512 void anv_CmdResolveImage2KHR(
1513 VkCommandBuffer commandBuffer,
1514 const VkResolveImageInfo2KHR* pResolveImageInfo)
1515 {
1516 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1517 ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
1518 ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
1519
1520 for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
1521 resolve_image(cmd_buffer,
1522 src_image, pResolveImageInfo->srcImageLayout,
1523 dst_image, pResolveImageInfo->dstImageLayout,
1524 &pResolveImageInfo->pRegions[r]);
1525 }
1526 }
1527
1528 void
anv_image_copy_to_shadow(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t base_level,uint32_t level_count,uint32_t base_layer,uint32_t layer_count)1529 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1530 const struct anv_image *image,
1531 VkImageAspectFlagBits aspect,
1532 uint32_t base_level, uint32_t level_count,
1533 uint32_t base_layer, uint32_t layer_count)
1534 {
1535 struct blorp_batch batch;
1536 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1537
1538 /* We don't know who touched the main surface last so flush a bunch of
1539 * caches to ensure we get good data.
1540 */
1541 anv_add_pending_pipe_bits(cmd_buffer,
1542 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1543 ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1544 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1545 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1546 "before copy_to_shadow");
1547
1548 struct blorp_surf surf;
1549 get_blorp_surf_for_anv_image(cmd_buffer->device,
1550 image, aspect,
1551 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1552 VK_IMAGE_LAYOUT_GENERAL,
1553 ISL_AUX_USAGE_NONE, &surf);
1554 assert(surf.aux_usage == ISL_AUX_USAGE_NONE);
1555
1556 struct blorp_surf shadow_surf;
1557 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
1558 image, aspect, &shadow_surf);
1559
1560 for (uint32_t l = 0; l < level_count; l++) {
1561 const uint32_t level = base_level + l;
1562
1563 const VkExtent3D extent = vk_image_mip_level_extent(&image->vk, level);
1564
1565 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1566 layer_count = extent.depth;
1567
1568 for (uint32_t a = 0; a < layer_count; a++) {
1569 const uint32_t layer = base_layer + a;
1570
1571 blorp_copy(&batch, &surf, level, layer,
1572 &shadow_surf, level, layer,
1573 0, 0, 0, 0, extent.width, extent.height);
1574 }
1575 }
1576
1577 /* We just wrote to the buffer with the render cache. Flush it. */
1578 anv_add_pending_pipe_bits(cmd_buffer,
1579 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
1580 "after copy_to_shadow");
1581
1582 anv_blorp_batch_finish(&batch);
1583 }
1584
1585 void
anv_image_clear_color(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,enum isl_format format,struct isl_swizzle swizzle,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,union isl_color_value clear_color)1586 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1587 const struct anv_image *image,
1588 VkImageAspectFlagBits aspect,
1589 enum isl_aux_usage aux_usage,
1590 enum isl_format format, struct isl_swizzle swizzle,
1591 uint32_t level, uint32_t base_layer, uint32_t layer_count,
1592 VkRect2D area, union isl_color_value clear_color)
1593 {
1594 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1595
1596 /* We don't support planar images with multisampling yet */
1597 assert(image->n_planes == 1);
1598
1599 struct blorp_batch batch;
1600 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1601
1602 struct blorp_surf surf;
1603 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1604 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1605 ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1606 aux_usage, &surf);
1607 anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1608 level, base_layer, layer_count);
1609
1610 blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1611 level, base_layer, layer_count,
1612 area.offset.x, area.offset.y,
1613 area.offset.x + area.extent.width,
1614 area.offset.y + area.extent.height,
1615 clear_color, 0 /* color_write_disable */);
1616
1617 anv_blorp_batch_finish(&batch);
1618 }
1619
1620 void
anv_image_clear_depth_stencil(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,enum isl_aux_usage depth_aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,float depth_value,uint8_t stencil_value)1621 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1622 const struct anv_image *image,
1623 VkImageAspectFlags aspects,
1624 enum isl_aux_usage depth_aux_usage,
1625 uint32_t level,
1626 uint32_t base_layer, uint32_t layer_count,
1627 VkRect2D area,
1628 float depth_value, uint8_t stencil_value)
1629 {
1630 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1631 VK_IMAGE_ASPECT_STENCIL_BIT));
1632
1633 struct blorp_batch batch;
1634 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1635 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1636
1637 struct blorp_surf depth = {};
1638 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1639 get_blorp_surf_for_anv_image(cmd_buffer->device,
1640 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1641 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1642 depth_aux_usage, &depth);
1643 }
1644
1645 struct blorp_surf stencil = {};
1646 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1647 const uint32_t plane =
1648 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1649 get_blorp_surf_for_anv_image(cmd_buffer->device,
1650 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1651 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1652 image->planes[plane].aux_usage, &stencil);
1653 }
1654
1655 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1656 * performance. If it does this, we need to flush it out of the depth
1657 * cache before rendering to it.
1658 */
1659 anv_add_pending_pipe_bits(cmd_buffer,
1660 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1661 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1662 "before clear DS");
1663
1664 blorp_clear_depth_stencil(&batch, &depth, &stencil,
1665 level, base_layer, layer_count,
1666 area.offset.x, area.offset.y,
1667 area.offset.x + area.extent.width,
1668 area.offset.y + area.extent.height,
1669 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1670 depth_value,
1671 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
1672 stencil_value);
1673
1674 /* Blorp may choose to clear stencil using RGBA32_UINT for better
1675 * performance. If it does this, we need to flush it out of the render
1676 * cache before someone starts trying to do stencil on it.
1677 */
1678 anv_add_pending_pipe_bits(cmd_buffer,
1679 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1680 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1681 "after clear DS");
1682
1683 struct blorp_surf stencil_shadow;
1684 if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1685 get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1686 VK_IMAGE_ASPECT_STENCIL_BIT,
1687 &stencil_shadow)) {
1688 union isl_color_value stencil_color = {
1689 .u32 = { stencil_value },
1690 };
1691 blorp_clear(&batch, &stencil_shadow,
1692 ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1693 level, base_layer, layer_count,
1694 area.offset.x, area.offset.y,
1695 area.offset.x + area.extent.width,
1696 area.offset.y + area.extent.height,
1697 stencil_color, 0 /* color_write_disable */);
1698 }
1699
1700 anv_blorp_batch_finish(&batch);
1701 }
1702
1703 void
anv_image_hiz_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op hiz_op)1704 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1705 const struct anv_image *image,
1706 VkImageAspectFlagBits aspect, uint32_t level,
1707 uint32_t base_layer, uint32_t layer_count,
1708 enum isl_aux_op hiz_op)
1709 {
1710 assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1711 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1712 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1713 assert(plane == 0);
1714
1715 struct blorp_batch batch;
1716 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1717 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1718
1719 struct blorp_surf surf;
1720 get_blorp_surf_for_anv_image(cmd_buffer->device,
1721 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1722 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1723 image->planes[plane].aux_usage, &surf);
1724
1725 blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1726
1727 anv_blorp_batch_finish(&batch);
1728 }
1729
1730 void
anv_image_hiz_clear(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlags aspects,uint32_t level,uint32_t base_layer,uint32_t layer_count,VkRect2D area,uint8_t stencil_value)1731 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
1732 const struct anv_image *image,
1733 VkImageAspectFlags aspects,
1734 uint32_t level,
1735 uint32_t base_layer, uint32_t layer_count,
1736 VkRect2D area, uint8_t stencil_value)
1737 {
1738 assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1739 VK_IMAGE_ASPECT_STENCIL_BIT));
1740
1741 struct blorp_batch batch;
1742 anv_blorp_batch_init(cmd_buffer, &batch, 0);
1743 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1744
1745 struct blorp_surf depth = {};
1746 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1747 const uint32_t plane =
1748 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1749 assert(base_layer + layer_count <=
1750 anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1751 get_blorp_surf_for_anv_image(cmd_buffer->device,
1752 image, VK_IMAGE_ASPECT_DEPTH_BIT,
1753 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1754 image->planes[plane].aux_usage, &depth);
1755 }
1756
1757 struct blorp_surf stencil = {};
1758 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1759 const uint32_t plane =
1760 anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1761 get_blorp_surf_for_anv_image(cmd_buffer->device,
1762 image, VK_IMAGE_ASPECT_STENCIL_BIT,
1763 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1764 image->planes[plane].aux_usage, &stencil);
1765 }
1766
1767 /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1768 *
1769 * "The following is required when performing a depth buffer clear with
1770 * using the WM_STATE or 3DSTATE_WM:
1771 *
1772 * * If other rendering operations have preceded this clear, a
1773 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1774 * enabled must be issued before the rectangle primitive used for
1775 * the depth buffer clear operation.
1776 * * [...]"
1777 *
1778 * Even though the PRM only says that this is required if using 3DSTATE_WM
1779 * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1780 * hangs when doing a clear with WM_HZ_OP.
1781 */
1782 anv_add_pending_pipe_bits(cmd_buffer,
1783 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1784 ANV_PIPE_DEPTH_STALL_BIT,
1785 "before clear hiz");
1786
1787 if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1788 depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1789 /* From Bspec 47010 (Depth Buffer Clear):
1790 *
1791 * Since the fast clear cycles to CCS are not cached in TileCache,
1792 * any previous depth buffer writes to overlapping pixels must be
1793 * flushed out of TileCache before a succeeding Depth Buffer Clear.
1794 * This restriction only applies to Depth Buffer with write-thru
1795 * enabled, since fast clears to CCS only occur for write-thru mode.
1796 *
1797 * There may have been a write to this depth buffer. Flush it from the
1798 * tile cache just in case.
1799 */
1800 anv_add_pending_pipe_bits(cmd_buffer,
1801 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1802 ANV_PIPE_TILE_CACHE_FLUSH_BIT,
1803 "before clear hiz_ccs_wt");
1804 }
1805
1806 blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
1807 level, base_layer, layer_count,
1808 area.offset.x, area.offset.y,
1809 area.offset.x + area.extent.width,
1810 area.offset.y + area.extent.height,
1811 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1812 ANV_HZ_FC_VAL,
1813 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1814 stencil_value);
1815
1816 anv_blorp_batch_finish(&batch);
1817
1818 /* From the SKL PRM, Depth Buffer Clear:
1819 *
1820 * "Depth Buffer Clear Workaround
1821 *
1822 * Depth buffer clear pass using any of the methods (WM_STATE,
1823 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1824 * command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1825 * starting to render. DepthStall and DepthFlush are not needed between
1826 * consecutive depth clear passes nor is it required if the depth-clear
1827 * pass was done with “full_surf_clear” bit set in the
1828 * 3DSTATE_WM_HZ_OP."
1829 *
1830 * Even though the PRM provides a bunch of conditions under which this is
1831 * supposedly unnecessary, we choose to perform the flush unconditionally
1832 * just to be safe.
1833 */
1834 anv_add_pending_pipe_bits(cmd_buffer,
1835 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1836 ANV_PIPE_DEPTH_STALL_BIT,
1837 "after clear hiz");
1838 }
1839
1840 void
anv_image_mcs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op mcs_op,union isl_color_value * clear_value,bool predicate)1841 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1842 const struct anv_image *image,
1843 enum isl_format format, struct isl_swizzle swizzle,
1844 VkImageAspectFlagBits aspect,
1845 uint32_t base_layer, uint32_t layer_count,
1846 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
1847 bool predicate)
1848 {
1849 assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1850 assert(image->vk.samples > 1);
1851 assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1852
1853 /* Multisampling with multi-planar formats is not supported */
1854 assert(image->n_planes == 1);
1855
1856 struct blorp_batch batch;
1857 anv_blorp_batch_init(cmd_buffer, &batch,
1858 BLORP_BATCH_PREDICATE_ENABLE * predicate +
1859 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1860 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1861
1862 struct blorp_surf surf;
1863 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1864 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1865 ISL_AUX_USAGE_MCS, &surf);
1866
1867 /* Blorp will store the clear color for us if we provide the clear color
1868 * address and we are doing a fast clear. So we save the clear value into
1869 * the blorp surface.
1870 */
1871 if (clear_value)
1872 surf.clear_color = *clear_value;
1873
1874 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1875 *
1876 * "After Render target fast clear, pipe-control with color cache
1877 * write-flush must be issued before sending any DRAW commands on
1878 * that render target."
1879 *
1880 * This comment is a bit cryptic and doesn't really tell you what's going
1881 * or what's really needed. It appears that fast clear ops are not
1882 * properly synchronized with other drawing. This means that we cannot
1883 * have a fast clear operation in the pipe at the same time as other
1884 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1885 * that the contents of the previous draw hit the render target before we
1886 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1887 * that it is completed before any additional drawing occurs.
1888 */
1889 anv_add_pending_pipe_bits(cmd_buffer,
1890 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1891 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1892 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1893 "before fast clear mcs");
1894
1895 switch (mcs_op) {
1896 case ISL_AUX_OP_FAST_CLEAR:
1897 blorp_fast_clear(&batch, &surf, format, swizzle,
1898 0, base_layer, layer_count,
1899 0, 0, image->vk.extent.width, image->vk.extent.height);
1900 break;
1901 case ISL_AUX_OP_PARTIAL_RESOLVE:
1902 blorp_mcs_partial_resolve(&batch, &surf, format,
1903 base_layer, layer_count);
1904 break;
1905 case ISL_AUX_OP_FULL_RESOLVE:
1906 case ISL_AUX_OP_AMBIGUATE:
1907 default:
1908 unreachable("Unsupported MCS operation");
1909 }
1910
1911 anv_add_pending_pipe_bits(cmd_buffer,
1912 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1913 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1914 "after fast clear mcs");
1915
1916 anv_blorp_batch_finish(&batch);
1917 }
1918
1919 void
anv_image_ccs_op(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,enum isl_format format,struct isl_swizzle swizzle,VkImageAspectFlagBits aspect,uint32_t level,uint32_t base_layer,uint32_t layer_count,enum isl_aux_op ccs_op,union isl_color_value * clear_value,bool predicate)1920 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1921 const struct anv_image *image,
1922 enum isl_format format, struct isl_swizzle swizzle,
1923 VkImageAspectFlagBits aspect, uint32_t level,
1924 uint32_t base_layer, uint32_t layer_count,
1925 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
1926 bool predicate)
1927 {
1928 assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1929 assert(image->vk.samples == 1);
1930 assert(level < anv_image_aux_levels(image, aspect));
1931 /* Multi-LOD YcBcR is not allowed */
1932 assert(image->n_planes == 1 || level == 0);
1933 assert(base_layer + layer_count <=
1934 anv_image_aux_layers(image, aspect, level));
1935
1936 const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1937
1938 struct blorp_batch batch;
1939 anv_blorp_batch_init(cmd_buffer, &batch,
1940 BLORP_BATCH_PREDICATE_ENABLE * predicate +
1941 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1942 assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1943
1944 struct blorp_surf surf;
1945 get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1946 0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1947 image->planes[plane].aux_usage,
1948 &surf);
1949
1950 uint32_t level_width = anv_minify(surf.surf->logical_level0_px.w, level);
1951 uint32_t level_height = anv_minify(surf.surf->logical_level0_px.h, level);
1952
1953 /* Blorp will store the clear color for us if we provide the clear color
1954 * address and we are doing a fast clear. So we save the clear value into
1955 * the blorp surface.
1956 */
1957 if (clear_value)
1958 surf.clear_color = *clear_value;
1959
1960 /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1961 *
1962 * "After Render target fast clear, pipe-control with color cache
1963 * write-flush must be issued before sending any DRAW commands on
1964 * that render target."
1965 *
1966 * This comment is a bit cryptic and doesn't really tell you what's going
1967 * or what's really needed. It appears that fast clear ops are not
1968 * properly synchronized with other drawing. This means that we cannot
1969 * have a fast clear operation in the pipe at the same time as other
1970 * regular drawing operations. We need to use a PIPE_CONTROL to ensure
1971 * that the contents of the previous draw hit the render target before we
1972 * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1973 * that it is completed before any additional drawing occurs.
1974 */
1975 anv_add_pending_pipe_bits(cmd_buffer,
1976 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1977 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1978 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1979 "before fast clear ccs");
1980
1981 switch (ccs_op) {
1982 case ISL_AUX_OP_FAST_CLEAR:
1983 blorp_fast_clear(&batch, &surf, format, swizzle,
1984 level, base_layer, layer_count,
1985 0, 0, level_width, level_height);
1986 break;
1987 case ISL_AUX_OP_FULL_RESOLVE:
1988 case ISL_AUX_OP_PARTIAL_RESOLVE:
1989 blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
1990 format, ccs_op);
1991 break;
1992 case ISL_AUX_OP_AMBIGUATE:
1993 for (uint32_t a = 0; a < layer_count; a++) {
1994 const uint32_t layer = base_layer + a;
1995 blorp_ccs_ambiguate(&batch, &surf, level, layer);
1996 }
1997 break;
1998 default:
1999 unreachable("Unsupported CCS operation");
2000 }
2001
2002 anv_add_pending_pipe_bits(cmd_buffer,
2003 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
2004 ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2005 "after fast clear ccs");
2006
2007 anv_blorp_batch_finish(&batch);
2008 }
2009