1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "ac_drm_fourcc.h"
29 #include "util/debug.h"
30 #include "util/u_atomic.h"
31 #include "vulkan/util/vk_format.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
35 #include "sid.h"
36 #include "vk_format.h"
37 #include "vk_util.h"
38
39 #include "gfx10_format_table.h"
40
41 static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
42 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
43 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
44
45 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)46 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
47 VkFormat format)
48 {
49 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
50 assert(pCreateInfo->samples <= 1);
51 return RADEON_SURF_MODE_LINEAR_ALIGNED;
52 }
53
54 /* MSAA resources must be 2D tiled. */
55 if (pCreateInfo->samples > 1)
56 return RADEON_SURF_MODE_2D;
57
58 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
59 device->physical_device->rad_info.chip_class <= GFX8) {
60 /* this causes hangs in some VK CTS tests on GFX9. */
61 /* Textures with a very small height are recommended to be linear. */
62 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
63 /* Only very thin and long 2D textures should benefit from
64 * linear_aligned. */
65 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
66 return RADEON_SURF_MODE_LINEAR_ALIGNED;
67 }
68
69 return RADEON_SURF_MODE_2D;
70 }
71
72 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)73 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
74 VkFormat format)
75 {
76 /* TC-compat HTILE is only available for GFX8+. */
77 if (device->physical_device->rad_info.chip_class < GFX8)
78 return false;
79
80 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
81 return false;
82
83 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
84 return false;
85
86 /* Do not enable TC-compatible HTILE if the image isn't readable by a
87 * shader because no texture fetches will happen.
88 */
89 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
90 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
91 return false;
92
93 if (device->physical_device->rad_info.chip_class < GFX9) {
94 /* TC-compat HTILE for MSAA depth/stencil images is broken
95 * on GFX8 because the tiling doesn't match.
96 */
97 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
98 return false;
99
100 /* GFX9+ supports compression for both 32-bit and 16-bit depth
101 * surfaces, while GFX8 only supports 32-bit natively. Though,
102 * the driver allows TC-compat HTILE for 16-bit depth surfaces
103 * with no Z planes compression.
104 */
105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
106 format != VK_FORMAT_D16_UNORM)
107 return false;
108 }
109
110 return true;
111 }
112
113 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)114 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
115 {
116 if (info->bo_metadata) {
117 if (device->physical_device->rad_info.chip_class >= GFX9)
118 return info->bo_metadata->u.gfx9.scanout;
119 else
120 return info->bo_metadata->u.legacy.scanout;
121 }
122
123 return info->scanout;
124 }
125
126 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)127 radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
128 const struct radv_image *image)
129 {
130 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
131 return true;
132
133 if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
134 /* Do not enable CMASK or DCC for small surfaces where the cost
135 * of the eliminate pass can be higher than the benefit of fast
136 * clear. RadeonSI does this, but the image threshold is
137 * different.
138 */
139 return false;
140 }
141
142 return !!(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
143 }
144
145 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)146 radv_image_use_fast_clear_for_image(const struct radv_device *device,
147 const struct radv_image *image)
148 {
149 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
150 return true;
151
152 return radv_image_use_fast_clear_for_image_early(device, image) &&
153 (image->exclusive ||
154 /* Enable DCC for concurrent images if stores are
155 * supported because that means we can keep DCC compressed on
156 * all layouts/queues.
157 */
158 radv_image_use_dcc_image_stores(device, image));
159 }
160
161 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)162 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
163 VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
164 {
165 bool blendable;
166
167 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
168 return false;
169
170 if (sign_reinterpret != NULL)
171 *sign_reinterpret = false;
172
173 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
174 const struct VkImageFormatListCreateInfo *format_list =
175 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
176 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
177
178 /* We have to ignore the existence of the list if viewFormatCount = 0 */
179 if (format_list && format_list->viewFormatCount) {
180 /* compatibility is transitive, so we only need to check
181 * one format with everything else. */
182 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
183 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
184 continue;
185
186 if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i],
187 sign_reinterpret))
188 return false;
189 }
190 } else {
191 return false;
192 }
193 }
194
195 return true;
196 }
197
198 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)199 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
200 {
201 if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics)
202 return false;
203
204 return radv_is_atomic_format_supported(format);
205 }
206
207 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)208 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
209 VkImageCreateFlags flags)
210 {
211 if (radv_format_is_atomic_allowed(device, format))
212 return true;
213
214 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
215 const struct VkImageFormatListCreateInfo *format_list =
216 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
217 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
218
219 /* We have to ignore the existence of the list if viewFormatCount = 0 */
220 if (format_list && format_list->viewFormatCount) {
221 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
222 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
223 return true;
224 }
225 }
226 }
227
228 return false;
229 }
230
231 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)232 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
233 const VkImageCreateInfo *pCreateInfo, VkFormat format,
234 bool *sign_reinterpret)
235 {
236 /* DCC (Delta Color Compression) is only available for GFX8+. */
237 if (device->physical_device->rad_info.chip_class < GFX8)
238 return false;
239
240 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
241 return false;
242
243 if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
244 return false;
245
246 /*
247 * TODO: Enable DCC for storage images on GFX9 and earlier.
248 *
249 * Also disable DCC with atomics because even when DCC stores are
250 * supported atomics will always decompress. So if we are
251 * decompressing a lot anyway we might as well not have DCC.
252 */
253 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
254 (device->physical_device->rad_info.chip_class < GFX10 ||
255 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
256 return false;
257
258 /* Do not enable DCC for fragment shading rate attachments. */
259 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
260 return false;
261
262 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
263 return false;
264
265 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
266 return false;
267
268 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
269 image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
270 return false;
271
272 /* Do not enable DCC for mipmapped arrays because performance is worse. */
273 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
274 return false;
275
276 if (device->physical_device->rad_info.chip_class < GFX10) {
277 /* TODO: Add support for DCC MSAA on GFX8-9. */
278 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
279 return false;
280
281 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
282 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
283 device->physical_device->rad_info.chip_class == GFX9)
284 return false;
285 }
286
287 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
288 pCreateInfo->flags, sign_reinterpret);
289 }
290
291 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)292 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
293 {
294 if (!radv_image_has_dcc(image))
295 return false;
296
297 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
298 return true;
299
300 if (!radv_image_use_fast_clear_for_image(device, image))
301 return false;
302
303 /* TODO: Fix storage images with DCC without DCC image stores.
304 * Disabling it for now. */
305 if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
306 return false;
307
308 return true;
309 }
310
311 /*
312 * Whether to enable image stores with DCC compression for this image. If
313 * this function returns false the image subresource should be decompressed
314 * before using it with image stores.
315 *
316 * Note that this can have mixed performance implications, see
317 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
318 *
319 * This function assumes the image uses DCC compression.
320 */
321 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)322 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
323 {
324 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class,
325 &image->planes[0].surface);
326 }
327
328 /*
329 * Whether to use a predicate to determine whether DCC is in a compressed
330 * state. This can be used to avoid decompressing an image multiple times.
331 */
332 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)333 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
334 {
335 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
336 }
337
338 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)339 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
340 {
341 return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
342 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
343 }
344
345 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image)346 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
347 {
348 /* TODO:
349 * - Investigate about mips+layers.
350 * - Enable on other gens.
351 */
352 bool use_htile_for_mips =
353 image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
354
355 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
356 if (device->physical_device->rad_info.chip_class == GFX10 &&
357 image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
358 return false;
359
360 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
361 * allowed with VRS attachments because we need HTILE.
362 */
363 if (image->info.width * image->info.height < 8 * 8 &&
364 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
365 !device->attachment_vrs_enabled)
366 return false;
367
368 if (device->instance->disable_htile_layers && image->info.array_size > 1)
369 return false;
370
371 return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
372 }
373
374 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)375 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
376 {
377 /* TC-compat CMASK is only available for GFX8+. */
378 if (device->physical_device->rad_info.chip_class < GFX8)
379 return false;
380
381 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
382 return false;
383
384 if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
385 return false;
386
387 /* Do not enable TC-compatible if the image isn't readable by a shader
388 * because no texture fetches will happen.
389 */
390 if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
391 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
392 return false;
393
394 /* If the image doesn't have FMASK, it can't be fetchable. */
395 if (!radv_image_has_fmask(image))
396 return false;
397
398 return true;
399 }
400
401 static uint32_t
si_get_bo_metadata_word1(const struct radv_device * device)402 si_get_bo_metadata_word1(const struct radv_device *device)
403 {
404 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
405 }
406
407 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)408 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
409 {
410 if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
411 return false;
412
413 if (md->size_metadata < 40)
414 return false;
415
416 return true;
417 }
418
419 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)420 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
421 const struct radeon_bo_metadata *md)
422 {
423 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
424
425 if (device->physical_device->rad_info.chip_class >= GFX9) {
426 if (md->u.gfx9.swizzle_mode > 0)
427 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
428 else
429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
430
431 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
432 } else {
433 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
434 surface->u.legacy.bankw = md->u.legacy.bankw;
435 surface->u.legacy.bankh = md->u.legacy.bankh;
436 surface->u.legacy.tile_split = md->u.legacy.tile_split;
437 surface->u.legacy.mtilea = md->u.legacy.mtilea;
438 surface->u.legacy.num_banks = md->u.legacy.num_banks;
439
440 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
441 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
442 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
443 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
444 else
445 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
446 }
447 }
448
449 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)450 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
451 const struct radv_image_create_info *create_info,
452 struct ac_surf_info *image_info)
453 {
454 unsigned width = image->info.width;
455 unsigned height = image->info.height;
456
457 /*
458 * minigbm sometimes allocates bigger images which is going to result in
459 * weird strides and other properties. Lets be lenient where possible and
460 * fail it on GFX10 (as we cannot cope there).
461 *
462 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
463 */
464 if (create_info->bo_metadata &&
465 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
466 const struct radeon_bo_metadata *md = create_info->bo_metadata;
467
468 if (device->physical_device->rad_info.chip_class >= GFX10) {
469 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
470 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
471 } else {
472 width = G_008F18_WIDTH(md->metadata[4]) + 1;
473 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
474 }
475 }
476
477 if (image->info.width == width && image->info.height == height)
478 return VK_SUCCESS;
479
480 if (width < image->info.width || height < image->info.height) {
481 fprintf(stderr,
482 "The imported image has smaller dimensions than the internal\n"
483 "dimensions. Using it is going to fail badly, so we reject\n"
484 "this import.\n"
485 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
486 image->info.width, image->info.height, width, height);
487 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
488 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
489 fprintf(stderr,
490 "Tried to import an image with inconsistent width on GFX10.\n"
491 "As GFX10 has no separate stride fields we cannot cope with\n"
492 "an inconsistency in width and will fail this import.\n"
493 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
494 image->info.width, image->info.height, width, height);
495 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
496 } else {
497 fprintf(stderr,
498 "Tried to import an image with inconsistent width on pre-GFX10.\n"
499 "As GFX10 has no separate stride fields we cannot cope with\n"
500 "an inconsistency and would fail on GFX10.\n"
501 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
502 image->info.width, image->info.height, width, height);
503 }
504 image_info->width = width;
505 image_info->height = height;
506
507 return VK_SUCCESS;
508 }
509
510 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)511 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
512 const struct radv_image_create_info *create_info,
513 struct ac_surf_info *image_info)
514 {
515 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
516 if (result != VK_SUCCESS)
517 return result;
518
519 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
520 if (create_info->bo_metadata) {
521 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
522 create_info->bo_metadata);
523 }
524
525 if (radv_surface_has_scanout(device, create_info)) {
526 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
527 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
528 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
529
530 image->info.surf_index = NULL;
531 }
532 }
533 return VK_SUCCESS;
534 }
535
536 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)537 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
538 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
539 {
540 uint64_t flags;
541 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
542 VkFormat format = vk_format_get_plane_format(image_format, plane_id);
543 const struct util_format_description *desc = vk_format_description(format);
544 bool is_depth, is_stencil;
545
546 is_depth = util_format_has_depth(desc);
547 is_stencil = util_format_has_stencil(desc);
548
549 flags = RADEON_SURF_SET(array_mode, MODE);
550
551 switch (pCreateInfo->imageType) {
552 case VK_IMAGE_TYPE_1D:
553 if (pCreateInfo->arrayLayers > 1)
554 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
555 else
556 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
557 break;
558 case VK_IMAGE_TYPE_2D:
559 if (pCreateInfo->arrayLayers > 1)
560 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
561 else
562 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
563 break;
564 case VK_IMAGE_TYPE_3D:
565 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
566 break;
567 default:
568 unreachable("unhandled image type");
569 }
570
571 /* Required for clearing/initializing a specific layer on GFX8. */
572 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
573
574 if (is_depth) {
575 flags |= RADEON_SURF_ZBUFFER;
576
577 if (radv_use_htile_for_image(device, image) &&
578 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
579 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
580 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
581 } else {
582 flags |= RADEON_SURF_NO_HTILE;
583 }
584 }
585
586 if (is_stencil)
587 flags |= RADEON_SURF_SBUFFER;
588
589 if (device->physical_device->rad_info.chip_class >= GFX9 &&
590 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
591 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
592 flags |= RADEON_SURF_NO_RENDER_TARGET;
593
594 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
595 &image->dcc_sign_reinterpret))
596 flags |= RADEON_SURF_DISABLE_DCC;
597
598 if (!radv_use_fmask_for_image(device, image))
599 flags |= RADEON_SURF_NO_FMASK;
600
601 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
602 flags |=
603 RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
604 }
605
606 return flags;
607 }
608
609 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)610 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
611 {
612 if (stencil)
613 return plane->surface.u.legacy.zs.stencil_tiling_index[level];
614 else
615 return plane->surface.u.legacy.tiling_index[level];
616 }
617
618 static unsigned
radv_map_swizzle(unsigned swizzle)619 radv_map_swizzle(unsigned swizzle)
620 {
621 switch (swizzle) {
622 case PIPE_SWIZZLE_Y:
623 return V_008F0C_SQ_SEL_Y;
624 case PIPE_SWIZZLE_Z:
625 return V_008F0C_SQ_SEL_Z;
626 case PIPE_SWIZZLE_W:
627 return V_008F0C_SQ_SEL_W;
628 case PIPE_SWIZZLE_0:
629 return V_008F0C_SQ_SEL_0;
630 case PIPE_SWIZZLE_1:
631 return V_008F0C_SQ_SEL_1;
632 default: /* PIPE_SWIZZLE_X */
633 return V_008F0C_SQ_SEL_X;
634 }
635 }
636
637 static void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])638 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
639 enum pipe_swizzle swizzle[4])
640 {
641 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
642 /* 64-bit formats only support storage images and storage images
643 * require identity component mappings. We use 32-bit
644 * instructions to access 64-bit images, so we need a special
645 * case here.
646 *
647 * The zw components are 1,0 so that they can be easily be used
648 * by loads to create the w component, which has to be 0 for
649 * NULL descriptors.
650 */
651 swizzle[0] = PIPE_SWIZZLE_X;
652 swizzle[1] = PIPE_SWIZZLE_Y;
653 swizzle[2] = PIPE_SWIZZLE_1;
654 swizzle[3] = PIPE_SWIZZLE_0;
655 } else if (!mapping) {
656 for (unsigned i = 0; i < 4; i++)
657 swizzle[i] = desc->swizzle[i];
658 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
659 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
660 PIPE_SWIZZLE_1};
661 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
662 } else {
663 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
664 }
665 }
666
667 static void
radv_make_buffer_descriptor(struct radv_device * device,struct radv_buffer * buffer,VkFormat vk_format,unsigned offset,unsigned range,uint32_t * state)668 radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
669 VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
670 {
671 const struct util_format_description *desc;
672 unsigned stride;
673 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
674 uint64_t va = gpu_address + buffer->offset;
675 unsigned num_format, data_format;
676 int first_non_void;
677 enum pipe_swizzle swizzle[4];
678 desc = vk_format_description(vk_format);
679 first_non_void = vk_format_get_first_non_void_channel(vk_format);
680 stride = desc->block.bits / 8;
681
682 radv_compose_swizzle(desc, NULL, swizzle);
683
684 va += offset;
685 state[0] = va;
686 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
687
688 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
689 range /= stride;
690 }
691
692 state[2] = range;
693 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
694 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
695 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
696 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
697
698 if (device->physical_device->rad_info.chip_class >= GFX10) {
699 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
700
701 /* OOB_SELECT chooses the out-of-bounds check:
702 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
703 * - 1: index >= NUM_RECORDS
704 * - 2: NUM_RECORDS == 0
705 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
706 * else: swizzle_address >= NUM_RECORDS
707 */
708 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
709 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
710 S_008F0C_RESOURCE_LEVEL(1);
711 } else {
712 num_format = radv_translate_buffer_numformat(desc, first_non_void);
713 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
714
715 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
716 assert(num_format != ~0);
717
718 state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
719 }
720 }
721
722 static void
si_set_mutable_tex_desc_fields(struct radv_device * device,struct radv_image * image,const struct legacy_surf_level * base_level_info,unsigned plane_id,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,bool is_storage_image,bool disable_compression,bool enable_write_compression,uint32_t * state)723 si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
724 const struct legacy_surf_level *base_level_info, unsigned plane_id,
725 unsigned base_level, unsigned first_level, unsigned block_width,
726 bool is_stencil, bool is_storage_image, bool disable_compression,
727 bool enable_write_compression, uint32_t *state)
728 {
729 struct radv_image_plane *plane = &image->planes[plane_id];
730 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
731 uint64_t va = gpu_address;
732 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
733 uint64_t meta_va = 0;
734 if (chip_class >= GFX9) {
735 if (is_stencil)
736 va += plane->surface.u.gfx9.zs.stencil_offset;
737 else
738 va += plane->surface.u.gfx9.surf_offset;
739 } else
740 va += (uint64_t)base_level_info->offset_256B * 256;
741
742 state[0] = va >> 8;
743 if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
744 state[0] |= plane->surface.tile_swizzle;
745 state[1] &= C_008F14_BASE_ADDRESS_HI;
746 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
747
748 if (chip_class >= GFX8) {
749 state[6] &= C_008F28_COMPRESSION_EN;
750 state[7] = 0;
751 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
752 meta_va = gpu_address + plane->surface.meta_offset;
753 if (chip_class <= GFX8)
754 meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
755
756 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
757 dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
758 meta_va |= dcc_tile_swizzle;
759 } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
760 meta_va = gpu_address + plane->surface.meta_offset;
761 }
762
763 if (meta_va) {
764 state[6] |= S_008F28_COMPRESSION_EN(1);
765 if (chip_class <= GFX9)
766 state[7] = meta_va >> 8;
767 }
768 }
769
770 if (chip_class >= GFX10) {
771 state[3] &= C_00A00C_SW_MODE;
772
773 if (is_stencil) {
774 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
775 } else {
776 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
777 }
778
779 state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
780
781 if (meta_va) {
782 struct gfx9_surf_meta_flags meta = {
783 .rb_aligned = 1,
784 .pipe_aligned = 1,
785 };
786
787 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
788 meta = plane->surface.u.gfx9.color.dcc;
789
790 if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
791 state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
792
793 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
794 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
795 }
796
797 state[7] = meta_va >> 16;
798 } else if (chip_class == GFX9) {
799 state[3] &= C_008F1C_SW_MODE;
800 state[4] &= C_008F20_PITCH;
801
802 if (is_stencil) {
803 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
804 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
805 } else {
806 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
807 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
808 }
809
810 state[5] &=
811 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
812 if (meta_va) {
813 struct gfx9_surf_meta_flags meta = {
814 .rb_aligned = 1,
815 .pipe_aligned = 1,
816 };
817
818 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
819 meta = plane->surface.u.gfx9.color.dcc;
820
821 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
822 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
823 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
824 }
825 } else {
826 /* GFX6-GFX8 */
827 unsigned pitch = base_level_info->nblk_x * block_width;
828 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
829
830 state[3] &= C_008F1C_TILING_INDEX;
831 state[3] |= S_008F1C_TILING_INDEX(index);
832 state[4] &= C_008F20_PITCH;
833 state[4] |= S_008F20_PITCH(pitch - 1);
834 }
835 }
836
837 static unsigned
radv_tex_dim(VkImageType image_type,VkImageViewType view_type,unsigned nr_layers,unsigned nr_samples,bool is_storage_image,bool gfx9)838 radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
839 unsigned nr_samples, bool is_storage_image, bool gfx9)
840 {
841 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
842 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
843
844 /* GFX9 allocates 1D textures as 2D. */
845 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
846 image_type = VK_IMAGE_TYPE_2D;
847 switch (image_type) {
848 case VK_IMAGE_TYPE_1D:
849 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
850 case VK_IMAGE_TYPE_2D:
851 if (nr_samples > 1)
852 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
853 else
854 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
855 case VK_IMAGE_TYPE_3D:
856 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
857 return V_008F1C_SQ_RSRC_IMG_3D;
858 else
859 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
860 default:
861 unreachable("illegal image type");
862 }
863 }
864
865 static unsigned
gfx9_border_color_swizzle(const struct util_format_description * desc)866 gfx9_border_color_swizzle(const struct util_format_description *desc)
867 {
868 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
869
870 if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
871 /* For the pre-defined border color values (white, opaque
872 * black, transparent black), the only thing that matters is
873 * that the alpha channel winds up in the correct place
874 * (because the RGB channels are all the same) so either of
875 * these enumerations will work.
876 */
877 if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
878 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
879 else
880 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
881 } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
882 if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
883 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
884 else
885 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
886 } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
887 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
888 } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
889 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
890 }
891
892 return bc_swizzle;
893 }
894
895 bool
vi_alpha_is_on_msb(struct radv_device * device,VkFormat format)896 vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
897 {
898 const struct util_format_description *desc = vk_format_description(format);
899
900 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
901 return desc->swizzle[3] == PIPE_SWIZZLE_X;
902
903 return radv_translate_colorswap(format, false) <= 1;
904 }
905 /**
906 * Build the sampler view descriptor for a texture (GFX10).
907 */
908 static void
gfx10_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)909 gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
910 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
911 const VkComponentMapping *mapping, unsigned first_level,
912 unsigned last_level, unsigned first_layer, unsigned last_layer,
913 unsigned width, unsigned height, unsigned depth, uint32_t *state,
914 uint32_t *fmask_state)
915 {
916 const struct util_format_description *desc;
917 enum pipe_swizzle swizzle[4];
918 unsigned img_format;
919 unsigned type;
920
921 desc = vk_format_description(vk_format);
922 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
923
924 radv_compose_swizzle(desc, mapping, swizzle);
925
926 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
927 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
928 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
929 height = 1;
930 depth = image->info.array_size;
931 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
932 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
933 depth = image->info.array_size;
934 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
935 depth = image->info.array_size / 6;
936
937 state[0] = 0;
938 state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
939 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
940 S_00A008_RESOURCE_LEVEL(1);
941 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
942 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
943 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
944 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
945 S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
946 S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
947 : last_level) |
948 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
949 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
950 * to know the total number of layers.
951 */
952 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
953 S_00A010_BASE_ARRAY(first_layer);
954 state[5] = S_00A014_ARRAY_PITCH(0) |
955 S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
956 : image->info.levels - 1) |
957 S_00A014_PERF_MOD(4);
958 state[6] = 0;
959 state[7] = 0;
960
961 if (radv_dcc_enabled(image, first_level)) {
962 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
963 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
964 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
965 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
966 }
967
968 if (radv_image_get_iterate256(device, image)) {
969 state[6] |= S_00A018_ITERATE_256(1);
970 }
971
972 /* Initialize the sampler view for FMASK. */
973 if (fmask_state) {
974 if (radv_image_has_fmask(image)) {
975 uint64_t gpu_address = radv_buffer_get_va(image->bo);
976 uint32_t format;
977 uint64_t va;
978
979 assert(image->plane_count == 1);
980
981 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
982
983 switch (image->info.samples) {
984 case 2:
985 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
986 break;
987 case 4:
988 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
989 break;
990 case 8:
991 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
992 break;
993 default:
994 unreachable("invalid nr_samples");
995 }
996
997 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
998 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
999 S_00A004_WIDTH_LO(width - 1);
1000 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1001 S_00A008_RESOURCE_LEVEL(1);
1002 fmask_state[3] =
1003 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1004 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1005 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
1006 S_00A00C_TYPE(
1007 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1008 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
1009 fmask_state[5] = 0;
1010 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
1011 fmask_state[7] = 0;
1012
1013 if (radv_image_is_tc_compat_cmask(image)) {
1014 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1015
1016 fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
1017 fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
1018 fmask_state[7] |= va >> 16;
1019 }
1020 } else
1021 memset(fmask_state, 0, 8 * 4);
1022 }
1023 }
1024
1025 /**
1026 * Build the sampler view descriptor for a texture (SI-GFX9)
1027 */
1028 static void
si_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)1029 si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1030 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1031 const VkComponentMapping *mapping, unsigned first_level,
1032 unsigned last_level, unsigned first_layer, unsigned last_layer,
1033 unsigned width, unsigned height, unsigned depth, uint32_t *state,
1034 uint32_t *fmask_state)
1035 {
1036 const struct util_format_description *desc;
1037 enum pipe_swizzle swizzle[4];
1038 int first_non_void;
1039 unsigned num_format, data_format, type;
1040
1041 desc = vk_format_description(vk_format);
1042
1043 radv_compose_swizzle(desc, mapping, swizzle);
1044
1045 first_non_void = vk_format_get_first_non_void_channel(vk_format);
1046
1047 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
1048 if (num_format == ~0) {
1049 num_format = 0;
1050 }
1051
1052 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
1053 if (data_format == ~0) {
1054 data_format = 0;
1055 }
1056
1057 /* S8 with either Z16 or Z32 HTILE need a special format. */
1058 if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1059 radv_image_is_tc_compat_htile(image)) {
1060 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1061 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1062 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
1063 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1064 }
1065 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
1066 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
1067 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1068 height = 1;
1069 depth = image->info.array_size;
1070 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1071 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1072 depth = image->info.array_size;
1073 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1074 depth = image->info.array_size / 6;
1075
1076 state[0] = 0;
1077 state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
1078 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1079 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1080 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1081 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1082 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1083 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1084 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1085 : last_level) |
1086 S_008F1C_TYPE(type));
1087 state[4] = 0;
1088 state[5] = S_008F24_BASE_ARRAY(first_layer);
1089 state[6] = 0;
1090 state[7] = 0;
1091
1092 if (device->physical_device->rad_info.chip_class == GFX9) {
1093 unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1094
1095 /* Depth is the last accessible layer on Gfx9.
1096 * The hw doesn't need to know the total number of layers.
1097 */
1098 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1099 state[4] |= S_008F20_DEPTH(depth - 1);
1100 else
1101 state[4] |= S_008F20_DEPTH(last_layer);
1102
1103 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1104 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1105 : image->info.levels - 1);
1106 } else {
1107 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1108 state[4] |= S_008F20_DEPTH(depth - 1);
1109 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1110 }
1111 if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1112 image->planes[0].surface.meta_offset) {
1113 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1114 } else {
1115 /* The last dword is unused by hw. The shader uses it to clear
1116 * bits in the first dword of sampler state.
1117 */
1118 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1119 if (first_level == last_level)
1120 state[7] = C_008F30_MAX_ANISO_RATIO;
1121 else
1122 state[7] = 0xffffffff;
1123 }
1124 }
1125
1126 /* Initialize the sampler view for FMASK. */
1127 if (fmask_state) {
1128 if (radv_image_has_fmask(image)) {
1129 uint32_t fmask_format;
1130 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1131 uint64_t va;
1132
1133 assert(image->plane_count == 1);
1134
1135 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1136
1137 if (device->physical_device->rad_info.chip_class == GFX9) {
1138 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1139 switch (image->info.samples) {
1140 case 2:
1141 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1142 break;
1143 case 4:
1144 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1145 break;
1146 case 8:
1147 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1148 break;
1149 default:
1150 unreachable("invalid nr_samples");
1151 }
1152 } else {
1153 switch (image->info.samples) {
1154 case 2:
1155 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1156 break;
1157 case 4:
1158 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1159 break;
1160 case 8:
1161 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1162 break;
1163 default:
1164 assert(0);
1165 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1166 }
1167 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1168 }
1169
1170 fmask_state[0] = va >> 8;
1171 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1172 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1173 S_008F14_NUM_FORMAT(num_format);
1174 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1175 fmask_state[3] =
1176 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1177 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1178 S_008F1C_TYPE(
1179 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1180 fmask_state[4] = 0;
1181 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1182 fmask_state[6] = 0;
1183 fmask_state[7] = 0;
1184
1185 if (device->physical_device->rad_info.chip_class == GFX9) {
1186 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1187 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1188 S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1189 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1190
1191 if (radv_image_is_tc_compat_cmask(image)) {
1192 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1193
1194 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1195 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1196 fmask_state[7] |= va >> 8;
1197 }
1198 } else {
1199 fmask_state[3] |=
1200 S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1201 fmask_state[4] |=
1202 S_008F20_DEPTH(depth - 1) |
1203 S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1204 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1205
1206 if (radv_image_is_tc_compat_cmask(image)) {
1207 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1208
1209 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1210 fmask_state[7] |= va >> 8;
1211 }
1212 }
1213 } else
1214 memset(fmask_state, 0, 8 * 4);
1215 }
1216 }
1217
1218 static void
radv_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,uint32_t * state,uint32_t * fmask_state)1219 radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1220 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1221 const VkComponentMapping *mapping, unsigned first_level,
1222 unsigned last_level, unsigned first_layer, unsigned last_layer,
1223 unsigned width, unsigned height, unsigned depth, uint32_t *state,
1224 uint32_t *fmask_state)
1225 {
1226 if (device->physical_device->rad_info.chip_class >= GFX10) {
1227 gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1228 first_level, last_level, first_layer, last_layer, width, height,
1229 depth, state, fmask_state);
1230 } else {
1231 si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1232 first_level, last_level, first_layer, last_layer, width, height,
1233 depth, state, fmask_state);
1234 }
1235 }
1236
1237 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * md)1238 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1239 struct radeon_bo_metadata *md)
1240 {
1241 static const VkComponentMapping fixedmapping;
1242 uint32_t desc[8];
1243
1244 assert(image->plane_count == 1);
1245
1246 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
1247 image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
1248 image->info.array_size - 1, image->info.width, image->info.height,
1249 image->info.depth, desc, NULL);
1250
1251 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1252 0, image->planes[0].surface.blk_w, false, false, false, false,
1253 desc);
1254
1255 ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1256 image->info.levels, desc, &md->size_metadata, md->metadata);
1257 }
1258
1259 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)1260 radv_init_metadata(struct radv_device *device, struct radv_image *image,
1261 struct radeon_bo_metadata *metadata)
1262 {
1263 struct radeon_surf *surface = &image->planes[0].surface;
1264
1265 memset(metadata, 0, sizeof(*metadata));
1266
1267 if (device->physical_device->rad_info.chip_class >= GFX9) {
1268 uint64_t dcc_offset =
1269 image->offset +
1270 (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1271 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1272 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1273 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1274 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1275 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1276 metadata->u.gfx9.dcc_max_compressed_block_size =
1277 surface->u.gfx9.color.dcc.max_compressed_block_size;
1278 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1279 } else {
1280 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1281 ? RADEON_LAYOUT_TILED
1282 : RADEON_LAYOUT_LINEAR;
1283 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1284 ? RADEON_LAYOUT_TILED
1285 : RADEON_LAYOUT_LINEAR;
1286 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1287 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1288 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1289 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1290 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1291 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1292 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1293 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1294 }
1295 radv_query_opaque_metadata(device, image, metadata);
1296 }
1297
1298 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)1299 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1300 uint64_t offset, uint32_t stride)
1301 {
1302 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1303 image->info.levels, offset, stride);
1304 }
1305
1306 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)1307 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1308 const struct radv_image *image, struct radeon_surf *surf)
1309 {
1310 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1311 image->info.depth > 1 || radv_image_has_dcc(image) ||
1312 !radv_image_use_fast_clear_for_image(device, image) ||
1313 (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1314 return;
1315
1316 assert(image->info.storage_samples == 1);
1317
1318 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1319 surf->total_size = surf->cmask_offset + surf->cmask_size;
1320 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1321 }
1322
1323 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)1324 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1325 {
1326 /* images with modifiers can be potentially imported */
1327 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1328 return;
1329
1330 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
1331 image->fce_pred_offset = image->size;
1332 image->size += 8 * image->info.levels;
1333 }
1334
1335 if (radv_image_use_dcc_predication(device, image)) {
1336 image->dcc_pred_offset = image->size;
1337 image->size += 8 * image->info.levels;
1338 }
1339
1340 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
1341 radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1342 image->clear_value_offset = image->size;
1343 image->size += 8 * image->info.levels;
1344 }
1345
1346 if (radv_image_is_tc_compat_htile(image) &&
1347 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1348 /* Metadata for the TC-compatible HTILE hardware bug which
1349 * have to be fixed by updating ZRANGE_PRECISION when doing
1350 * fast depth clears to 0.0f.
1351 */
1352 image->tc_compat_zrange_offset = image->size;
1353 image->size += image->info.levels * 4;
1354 }
1355 }
1356
1357 /* Determine if the image is affected by the pipe misaligned metadata issue
1358 * which requires to invalidate L2.
1359 */
1360 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)1361 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1362 {
1363 struct radeon_info *rad_info = &device->physical_device->rad_info;
1364 int log2_samples = util_logbase2(image->info.samples);
1365
1366 assert(rad_info->chip_class >= GFX10);
1367
1368 for (unsigned i = 0; i < image->plane_count; ++i) {
1369 VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
1370 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1371 int log2_bpp_and_samples;
1372
1373 if (rad_info->chip_class >= GFX10_3) {
1374 log2_bpp_and_samples = log2_bpp + log2_samples;
1375 } else {
1376 if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
1377 log2_bpp = 2;
1378 }
1379
1380 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1381 }
1382
1383 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1384 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1385
1386 if (vk_format_has_depth(image->vk_format)) {
1387 if (radv_image_is_tc_compat_htile(image) && overlap) {
1388 return true;
1389 }
1390 } else {
1391 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1392 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1393 int samples_overlap = MIN2(log2_samples, overlap);
1394
1395 /* TODO: It shouldn't be necessary if the image has DCC but
1396 * not readable by shader.
1397 */
1398 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1399 (samples_overlap > log2_samples_frag_diff)) {
1400 return true;
1401 }
1402 }
1403 }
1404
1405 return false;
1406 }
1407
1408 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)1409 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1410 {
1411 if (device->physical_device->rad_info.chip_class >= GFX10) {
1412 return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1413 !radv_image_is_pipe_misaligned(device, image);
1414 } else if (device->physical_device->rad_info.chip_class == GFX9) {
1415 if (image->info.samples == 1 &&
1416 (image->usage &
1417 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1418 !vk_format_has_stencil(image->vk_format)) {
1419 /* Single-sample color and single-sample depth
1420 * (not stencil) are coherent with shaders on
1421 * GFX9.
1422 */
1423 return true;
1424 }
1425 }
1426
1427 return false;
1428 }
1429
1430 /**
1431 * Determine if the given image can be fast cleared.
1432 */
1433 static bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)1434 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1435 {
1436 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1437 return false;
1438
1439 if (vk_format_is_color(image->vk_format)) {
1440 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1441 return false;
1442
1443 /* RB+ doesn't work with CMASK fast clear on Stoney. */
1444 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1445 return false;
1446 } else {
1447 if (!radv_image_has_htile(image))
1448 return false;
1449 }
1450
1451 /* Do not fast clears 3D images. */
1452 if (image->type == VK_IMAGE_TYPE_3D)
1453 return false;
1454
1455 return true;
1456 }
1457
1458 /**
1459 * Determine if the given image can be fast cleared using comp-to-single.
1460 */
1461 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)1462 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
1463 {
1464 /* comp-to-single is only available for GFX10+. */
1465 if (device->physical_device->rad_info.chip_class < GFX10)
1466 return false;
1467
1468 /* If the image can't be fast cleared, comp-to-single can't be used. */
1469 if (!radv_image_can_fast_clear(device, image))
1470 return false;
1471
1472 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1473 if (!radv_image_has_dcc(image))
1474 return false;
1475
1476 /* It seems 8bpp and 16bpp require RB+ to work. */
1477 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format);
1478 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
1479 return false;
1480
1481 return true;
1482 }
1483
1484 static void
radv_image_reset_layout(struct radv_image * image)1485 radv_image_reset_layout(struct radv_image *image)
1486 {
1487 image->size = 0;
1488 image->alignment = 1;
1489
1490 image->tc_compatible_cmask = 0;
1491 image->fce_pred_offset = image->dcc_pred_offset = 0;
1492 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1493
1494 for (unsigned i = 0; i < image->plane_count; ++i) {
1495 VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1496 if (vk_format_has_depth(format))
1497 format = vk_format_depth_only(format);
1498
1499 uint64_t flags = image->planes[i].surface.flags;
1500 uint64_t modifier = image->planes[i].surface.modifier;
1501 memset(image->planes + i, 0, sizeof(image->planes[i]));
1502
1503 image->planes[i].surface.flags = flags;
1504 image->planes[i].surface.modifier = modifier;
1505 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1506 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1507 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1508
1509 /* align byte per element on dword */
1510 if (image->planes[i].surface.bpe == 3) {
1511 image->planes[i].surface.bpe = 4;
1512 }
1513 }
1514 }
1515
1516 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,struct radv_image * image)1517 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1518 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1519 struct radv_image *image)
1520 {
1521 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1522 * common internal case. */
1523 create_info.vk_info = NULL;
1524
1525 struct ac_surf_info image_info = image->info;
1526 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1527 if (result != VK_SUCCESS)
1528 return result;
1529
1530 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1531
1532 radv_image_reset_layout(image);
1533
1534 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1535 struct ac_surf_info info = image_info;
1536 uint64_t offset;
1537 unsigned stride;
1538
1539 info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
1540 info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
1541
1542 if (create_info.no_metadata_planes || image->plane_count > 1) {
1543 image->planes[plane].surface.flags |=
1544 RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1545 }
1546
1547 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1548
1549 if (plane == 0) {
1550 if (!radv_use_dcc_for_image_late(device, image))
1551 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1552 }
1553
1554 if (create_info.bo_metadata && !mod_info &&
1555 !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1556 &image->planes[plane].surface, image_info.storage_samples,
1557 image_info.levels, create_info.bo_metadata->size_metadata,
1558 create_info.bo_metadata->metadata))
1559 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1560
1561 if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 &&
1562 !mod_info)
1563 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1564
1565 if (mod_info) {
1566 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1567 !mod_info->pPlaneLayouts[plane].rowPitch)
1568 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1569
1570 offset = mod_info->pPlaneLayouts[plane].offset;
1571 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1572 } else {
1573 offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1574 stride = 0; /* 0 means no override */
1575 }
1576
1577 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1578 &image->planes[plane].surface, image->info.levels,
1579 offset, stride))
1580 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1581
1582 /* Validate DCC offsets in modifier layout. */
1583 if (image->plane_count == 1 && mod_info) {
1584 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1585 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1586 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1587
1588 for (unsigned i = 1; i < mem_planes; ++i) {
1589 if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1590 &image->planes[plane].surface, i,
1591 0) != mod_info->pPlaneLayouts[i].offset)
1592 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1593 }
1594 }
1595
1596 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1597 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1598
1599 image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1600 }
1601
1602 image->tc_compatible_cmask =
1603 radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1604
1605 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1606
1607 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1608
1609 radv_image_alloc_values(device, image);
1610
1611 assert(image->planes[0].surface.surf_size);
1612 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1613 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1614 return VK_SUCCESS;
1615 }
1616
1617 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1618 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1619 struct radv_image *image)
1620 {
1621 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1622 device->ws->buffer_destroy(device->ws, image->bo);
1623
1624 if (image->owned_memory != VK_NULL_HANDLE) {
1625 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1626 radv_free_memory(device, pAllocator, mem);
1627 }
1628
1629 vk_object_base_finish(&image->base);
1630 vk_free2(&device->vk.alloc, pAllocator, image);
1631 }
1632
1633 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1634 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1635 {
1636 fprintf(stderr, "Image:\n");
1637 fprintf(stderr,
1638 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1639 "width=%" PRIu32 ", height=%" PRIu32 ", "
1640 "offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
1641 image->size, image->alignment, image->info.width, image->info.height, image->offset,
1642 image->info.array_size);
1643 for (unsigned i = 0; i < image->plane_count; ++i) {
1644 const struct radv_image_plane *plane = &image->planes[i];
1645 const struct radeon_surf *surf = &plane->surface;
1646 const struct util_format_description *desc = vk_format_description(plane->format);
1647 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1648 &plane->surface, 0, 0);
1649
1650 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1651
1652 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1653 }
1654 }
1655
1656 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1657 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1658 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1659 {
1660 const struct radv_physical_device *pdev = dev->physical_device;
1661 unsigned mod_count;
1662
1663 assert(mod_list->drmFormatModifierCount);
1664
1665 /* We can allow everything here as it does not affect order and the application
1666 * is only allowed to specify modifiers that we support. */
1667 const struct ac_modifier_options modifier_options = {
1668 .dcc = true,
1669 .dcc_retile = true,
1670 };
1671
1672 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1673 &mod_count, NULL);
1674
1675 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1676
1677 /* If allocations fail, fall back to a dumber solution. */
1678 if (!mods)
1679 return mod_list->pDrmFormatModifiers[0];
1680
1681 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1682 &mod_count, mods);
1683
1684 for (unsigned i = 0; i < mod_count; ++i) {
1685 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1686 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1687 free(mods);
1688 return mod_list->pDrmFormatModifiers[j];
1689 }
1690 }
1691 }
1692 unreachable("App specified an invalid modifier");
1693 }
1694
1695 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage)1696 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1697 const VkAllocationCallbacks *alloc, VkImage *pImage)
1698 {
1699 RADV_FROM_HANDLE(radv_device, device, _device);
1700 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1701 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1702 struct radv_image *image = NULL;
1703 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1704 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1705 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1706 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1707 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1708 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1709
1710 const unsigned plane_count = vk_format_get_plane_count(format);
1711 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1712
1713 radv_assert(pCreateInfo->mipLevels > 0);
1714 radv_assert(pCreateInfo->arrayLayers > 0);
1715 radv_assert(pCreateInfo->samples > 0);
1716 radv_assert(pCreateInfo->extent.width > 0);
1717 radv_assert(pCreateInfo->extent.height > 0);
1718 radv_assert(pCreateInfo->extent.depth > 0);
1719
1720 image =
1721 vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1722 if (!image)
1723 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1724
1725 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1726
1727 image->type = pCreateInfo->imageType;
1728 image->info.width = pCreateInfo->extent.width;
1729 image->info.height = pCreateInfo->extent.height;
1730 image->info.depth = pCreateInfo->extent.depth;
1731 image->info.samples = pCreateInfo->samples;
1732 image->info.storage_samples = pCreateInfo->samples;
1733 image->info.array_size = pCreateInfo->arrayLayers;
1734 image->info.levels = pCreateInfo->mipLevels;
1735 image->info.num_channels = vk_format_get_nr_components(format);
1736
1737 image->vk_format = format;
1738 image->tiling = pCreateInfo->tiling;
1739 image->usage = pCreateInfo->usage;
1740 image->flags = pCreateInfo->flags;
1741 image->plane_count = plane_count;
1742
1743 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1744 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1745 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1746 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1747 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1748 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1749 else
1750 image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1751 }
1752
1753 const VkExternalMemoryImageCreateInfo *external_info =
1754 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1755
1756 image->shareable = external_info;
1757 if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1758 !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1759 pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1760 image->info.surf_index = &device->image_mrt_offset_counter;
1761 }
1762
1763 if (mod_list)
1764 modifier = radv_select_modifier(device, format, mod_list);
1765 else if (explicit_mod)
1766 modifier = explicit_mod->drmFormatModifier;
1767
1768 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1769 image->planes[plane].surface.flags =
1770 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1771 image->planes[plane].surface.modifier = modifier;
1772 }
1773
1774 bool delay_layout =
1775 external_info && (external_info->handleTypes &
1776 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1777
1778 if (delay_layout) {
1779 *pImage = radv_image_to_handle(image);
1780 assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1781 return VK_SUCCESS;
1782 }
1783
1784 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1785 if (result != VK_SUCCESS) {
1786 radv_destroy_image(device, alloc, image);
1787 return result;
1788 }
1789
1790 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1791 image->alignment = MAX2(image->alignment, 4096);
1792 image->size = align64(image->size, image->alignment);
1793 image->offset = 0;
1794
1795 result =
1796 device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1797 RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
1798 if (result != VK_SUCCESS) {
1799 radv_destroy_image(device, alloc, image);
1800 return vk_error(device, result);
1801 }
1802 }
1803
1804 if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1805 radv_image_print_info(device, image);
1806 }
1807
1808 *pImage = radv_image_to_handle(image);
1809
1810 return VK_SUCCESS;
1811 }
1812
1813 static void
radv_image_view_make_descriptor(struct radv_image_view * iview,struct radv_device * device,VkFormat vk_format,const VkComponentMapping * components,bool is_storage_image,bool disable_compression,bool enable_compression,unsigned plane_id,unsigned descriptor_plane_id)1814 radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1815 VkFormat vk_format, const VkComponentMapping *components,
1816 bool is_storage_image, bool disable_compression,
1817 bool enable_compression, unsigned plane_id,
1818 unsigned descriptor_plane_id)
1819 {
1820 struct radv_image *image = iview->image;
1821 struct radv_image_plane *plane = &image->planes[plane_id];
1822 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1823 uint32_t blk_w;
1824 union radv_descriptor *descriptor;
1825 uint32_t hw_level = 0;
1826
1827 if (is_storage_image) {
1828 descriptor = &iview->storage_descriptor;
1829 } else {
1830 descriptor = &iview->descriptor;
1831 }
1832
1833 assert(vk_format_get_plane_count(vk_format) == 1);
1834 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1835 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1836 vk_format_get_blockwidth(vk_format);
1837
1838 if (device->physical_device->rad_info.chip_class >= GFX9)
1839 hw_level = iview->base_mip;
1840 radv_make_texture_descriptor(
1841 device, image, is_storage_image, iview->type, vk_format, components, hw_level,
1842 hw_level + iview->level_count - 1, iview->base_layer,
1843 iview->base_layer + iview->layer_count - 1,
1844 vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
1845 vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
1846 iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
1847 descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor);
1848
1849 const struct legacy_surf_level *base_level_info = NULL;
1850 if (device->physical_device->rad_info.chip_class <= GFX9) {
1851 if (is_stencil)
1852 base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip];
1853 else
1854 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1855 }
1856
1857 bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
1858 if (is_storage_image && !(enable_write_compression || enable_compression))
1859 disable_compression = true;
1860 si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
1861 iview->base_mip, blk_w, is_stencil, is_storage_image,
1862 disable_compression, enable_write_compression,
1863 descriptor->plane_descriptors[descriptor_plane_id]);
1864 }
1865
1866 static unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1867 radv_plane_from_aspect(VkImageAspectFlags mask)
1868 {
1869 switch (mask) {
1870 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1871 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1872 return 1;
1873 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1874 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1875 return 2;
1876 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1877 return 3;
1878 default:
1879 return 0;
1880 }
1881 }
1882
1883 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1884 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1885 {
1886 switch (mask) {
1887 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1888 return image->planes[0].format;
1889 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1890 return image->planes[1].format;
1891 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1892 return image->planes[2].format;
1893 case VK_IMAGE_ASPECT_STENCIL_BIT:
1894 return vk_format_stencil_only(image->vk_format);
1895 case VK_IMAGE_ASPECT_DEPTH_BIT:
1896 return vk_format_depth_only(image->vk_format);
1897 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1898 return vk_format_depth_only(image->vk_format);
1899 default:
1900 return image->vk_format;
1901 }
1902 }
1903
1904 /**
1905 * Determine if the given image view can be fast cleared.
1906 */
1907 static bool
radv_image_view_can_fast_clear(const struct radv_device * device,const struct radv_image_view * iview)1908 radv_image_view_can_fast_clear(const struct radv_device *device,
1909 const struct radv_image_view *iview)
1910 {
1911 struct radv_image *image;
1912
1913 if (!iview)
1914 return false;
1915 image = iview->image;
1916
1917 /* Only fast clear if the image itself can be fast cleared. */
1918 if (!radv_image_can_fast_clear(device, image))
1919 return false;
1920
1921 /* Only fast clear if all layers are bound. */
1922 if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
1923 return false;
1924
1925 /* Only fast clear if the view covers the whole image. */
1926 if (!radv_image_extent_compare(image, &iview->extent))
1927 return false;
1928
1929 return true;
1930 }
1931
1932 void
radv_image_view_init(struct radv_image_view * iview,struct radv_device * device,const VkImageViewCreateInfo * pCreateInfo,const struct radv_image_view_extra_create_info * extra_create_info)1933 radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
1934 const VkImageViewCreateInfo *pCreateInfo,
1935 const struct radv_image_view_extra_create_info *extra_create_info)
1936 {
1937 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1938 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1939 uint32_t plane_count = 1;
1940
1941 vk_object_base_init(&device->vk, &iview->base, VK_OBJECT_TYPE_IMAGE_VIEW);
1942
1943 switch (image->type) {
1944 case VK_IMAGE_TYPE_1D:
1945 case VK_IMAGE_TYPE_2D:
1946 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1947 image->info.array_size);
1948 break;
1949 case VK_IMAGE_TYPE_3D:
1950 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1951 radv_minify(image->info.depth, range->baseMipLevel));
1952 break;
1953 default:
1954 unreachable("bad VkImageType");
1955 }
1956 iview->image = image;
1957 iview->type = pCreateInfo->viewType;
1958 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1959 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1960 iview->base_layer = range->baseArrayLayer;
1961 iview->layer_count = radv_get_layerCount(image, range);
1962 iview->base_mip = range->baseMipLevel;
1963 iview->level_count = radv_get_levelCount(image, range);
1964
1965 iview->vk_format = pCreateInfo->format;
1966
1967 /* If the image has an Android external format, pCreateInfo->format will be
1968 * VK_FORMAT_UNDEFINED. */
1969 if (iview->vk_format == VK_FORMAT_UNDEFINED)
1970 iview->vk_format = image->vk_format;
1971
1972 /* Split out the right aspect. Note that for internal meta code we sometimes
1973 * use an equivalent color format for the aspect so we first have to check
1974 * if we actually got depth/stencil formats. */
1975 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1976 if (vk_format_has_stencil(iview->vk_format))
1977 iview->vk_format = vk_format_stencil_only(iview->vk_format);
1978 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1979 if (vk_format_has_depth(iview->vk_format))
1980 iview->vk_format = vk_format_depth_only(iview->vk_format);
1981 }
1982
1983 if (device->physical_device->rad_info.chip_class >= GFX9) {
1984 iview->extent = (VkExtent3D){
1985 .width = image->info.width,
1986 .height = image->info.height,
1987 .depth = image->info.depth,
1988 };
1989 } else {
1990 iview->extent = (VkExtent3D){
1991 .width = radv_minify(image->info.width, range->baseMipLevel),
1992 .height = radv_minify(image->info.height, range->baseMipLevel),
1993 .depth = radv_minify(image->info.depth, range->baseMipLevel),
1994 };
1995 }
1996
1997 if (iview->vk_format != image->planes[iview->plane_id].format) {
1998 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1999 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
2000 unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
2001 unsigned img_bh = vk_format_get_blockheight(image->vk_format);
2002
2003 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
2004 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
2005
2006 /* Comment ported from amdvlk -
2007 * If we have the following image:
2008 * Uncompressed pixels Compressed block sizes (4x4)
2009 * mip0: 22 x 22 6 x 6
2010 * mip1: 11 x 11 3 x 3
2011 * mip2: 5 x 5 2 x 2
2012 * mip3: 2 x 2 1 x 1
2013 * mip4: 1 x 1 1 x 1
2014 *
2015 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
2016 * the HW is calculating the degradation of the block sizes down the mip-chain as follows
2017 * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
2018 *
2019 * This means that mip2 will be missing texels.
2020 *
2021 * Fix this by calculating the base mip's width and height, then convert
2022 * that, and round it back up to get the level 0 size. Clamp the
2023 * converted size between the original values, and the physical extent
2024 * of the base mipmap.
2025 *
2026 * On GFX10 we have to take care to not go over the physical extent
2027 * of the base mipmap as otherwise the GPU computes a different layout.
2028 * Note that the GPU does use the same base-mip dimensions for both a
2029 * block compatible format and the compressed format, so even if we take
2030 * the plain converted dimensions the physical layout is correct.
2031 */
2032 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2033 vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
2034 /* If we have multiple levels in the view we should ideally take the last level,
2035 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
2036 * useful way is hard. */
2037 if (iview->level_count > 1) {
2038 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
2039 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
2040 } else {
2041 unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
2042 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
2043
2044 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
2045 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
2046
2047 lvl_width <<= range->baseMipLevel;
2048 lvl_height <<= range->baseMipLevel;
2049
2050 iview->extent.width = CLAMP(lvl_width, iview->extent.width,
2051 iview->image->planes[0].surface.u.gfx9.base_mip_width);
2052 iview->extent.height = CLAMP(lvl_height, iview->extent.height,
2053 iview->image->planes[0].surface.u.gfx9.base_mip_height);
2054 }
2055 }
2056 }
2057
2058 iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
2059
2060 if (vk_format_get_plane_count(image->vk_format) > 1 &&
2061 iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
2062 plane_count = vk_format_get_plane_count(iview->vk_format);
2063 }
2064
2065 bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
2066 bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
2067 for (unsigned i = 0; i < plane_count; ++i) {
2068 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
2069 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
2070 disable_compression, enable_compression, iview->plane_id + i,
2071 i);
2072 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
2073 disable_compression, enable_compression, iview->plane_id + i,
2074 i);
2075 }
2076 }
2077
2078 void
radv_image_view_finish(struct radv_image_view * iview)2079 radv_image_view_finish(struct radv_image_view *iview)
2080 {
2081 vk_object_base_finish(&iview->base);
2082 }
2083
2084 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2085 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2086 VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
2087 {
2088 switch (layout) {
2089 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
2090 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
2091 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
2092 return radv_image_has_htile(image);
2093 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
2094 return radv_image_is_tc_compat_htile(image) ||
2095 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
2096 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
2097 case VK_IMAGE_LAYOUT_GENERAL:
2098 /* It should be safe to enable TC-compat HTILE with
2099 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
2100 * if the image doesn't have the storage bit set. This
2101 * improves performance for apps that use GENERAL for the main
2102 * depth pass because this allows compression and this reduces
2103 * the number of decompressions from/to GENERAL.
2104 */
2105 /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2106 * queue is likely broken for eg. depth/stencil copies.
2107 */
2108 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2109 !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
2110 return true;
2111 } else {
2112 return false;
2113 }
2114 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2115 if (radv_image_is_tc_compat_htile(image) ||
2116 (radv_image_has_htile(image) &&
2117 !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2118 /* Keep HTILE compressed if the image is only going to
2119 * be used as a depth/stencil read-only attachment.
2120 */
2121 return true;
2122 } else {
2123 return false;
2124 }
2125 break;
2126 default:
2127 return radv_image_is_tc_compat_htile(image);
2128 }
2129 }
2130
2131 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2132 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2133 unsigned level, VkImageLayout layout, bool in_render_loop,
2134 unsigned queue_mask)
2135 {
2136 if (radv_dcc_enabled(image, level) &&
2137 !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask))
2138 return false;
2139
2140 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2141 return false;
2142
2143 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
2144 return false;
2145
2146 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
2147 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
2148 * FCE on the compute queue.
2149 */
2150 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
2151 }
2152
2153 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2154 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2155 unsigned level, VkImageLayout layout, bool in_render_loop,
2156 unsigned queue_mask)
2157 {
2158 if (!radv_dcc_enabled(image, level))
2159 return false;
2160
2161 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
2162 return true;
2163
2164 /* If the image is read-only, we can always just keep it compressed */
2165 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2166 return true;
2167
2168 /* Don't compress compute transfer dst when image stores are not supported. */
2169 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2170 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2171 return false;
2172
2173 return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2174 }
2175
2176 bool
radv_layout_fmask_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)2177 radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2178 VkImageLayout layout, unsigned queue_mask)
2179 {
2180 if (!radv_image_has_fmask(image))
2181 return false;
2182
2183 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2184 * expanded before.
2185 */
2186 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2187 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2188 return false;
2189
2190 /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2191 return layout != VK_IMAGE_LAYOUT_GENERAL &&
2192 (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2193 }
2194
2195 unsigned
radv_image_queue_family_mask(const struct radv_image * image,uint32_t family,uint32_t queue_family)2196 radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
2197 {
2198 if (!image->exclusive)
2199 return image->queue_family_mask;
2200 if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
2201 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2202 if (family == VK_QUEUE_FAMILY_IGNORED)
2203 return 1u << queue_family;
2204 return 1u << family;
2205 }
2206
2207 VkResult
radv_CreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)2208 radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
2209 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2210 {
2211 #ifdef ANDROID
2212 const VkNativeBufferANDROID *gralloc_info =
2213 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2214
2215 if (gralloc_info)
2216 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
2217 #endif
2218
2219 const struct wsi_image_create_info *wsi_info =
2220 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2221 bool scanout = wsi_info && wsi_info->scanout;
2222
2223 return radv_image_create(device,
2224 &(struct radv_image_create_info){
2225 .vk_info = pCreateInfo,
2226 .scanout = scanout,
2227 },
2228 pAllocator, pImage);
2229 }
2230
2231 void
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)2232 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2233 {
2234 RADV_FROM_HANDLE(radv_device, device, _device);
2235 RADV_FROM_HANDLE(radv_image, image, _image);
2236
2237 if (!image)
2238 return;
2239
2240 radv_destroy_image(device, pAllocator, image);
2241 }
2242
2243 void
radv_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)2244 radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2245 const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2246 {
2247 RADV_FROM_HANDLE(radv_image, image, _image);
2248 RADV_FROM_HANDLE(radv_device, device, _device);
2249 int level = pSubresource->mipLevel;
2250 int layer = pSubresource->arrayLayer;
2251
2252 unsigned plane_id = 0;
2253 if (vk_format_get_plane_count(image->vk_format) > 1)
2254 plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2255
2256 struct radv_image_plane *plane = &image->planes[plane_id];
2257 struct radeon_surf *surface = &plane->surface;
2258
2259 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2260 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2261
2262 assert(level == 0);
2263 assert(layer == 0);
2264
2265 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2266 surface, mem_plane_id, 0);
2267 pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
2268 surface, mem_plane_id);
2269 pLayout->arrayPitch = 0;
2270 pLayout->depthPitch = 0;
2271 pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2272 } else if (device->physical_device->rad_info.chip_class >= GFX9) {
2273 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2274
2275 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2276 &plane->surface, 0, layer) +
2277 level_offset;
2278 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
2279 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
2280 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
2281 /* Adjust the number of bytes between each row because
2282 * the pitch is actually the number of components per
2283 * row.
2284 */
2285 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2286 } else {
2287 uint32_t pitch =
2288 surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2289
2290 assert(util_is_power_of_two_nonzero(surface->bpe));
2291 pLayout->rowPitch = pitch * surface->bpe;
2292 }
2293
2294 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2295 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2296 pLayout->size = surface->u.gfx9.surf_slice_size;
2297 if (image->type == VK_IMAGE_TYPE_3D)
2298 pLayout->size *= u_minify(image->info.depth, level);
2299 } else {
2300 pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2301 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2302 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2303 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2304 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2305 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2306 if (image->type == VK_IMAGE_TYPE_3D)
2307 pLayout->size *= u_minify(image->info.depth, level);
2308 }
2309 }
2310
2311 VkResult
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)2312 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2313 VkImageDrmFormatModifierPropertiesEXT *pProperties)
2314 {
2315 RADV_FROM_HANDLE(radv_image, image, _image);
2316
2317 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2318 return VK_SUCCESS;
2319 }
2320
2321 VkResult
radv_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)2322 radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2323 const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2324 {
2325 RADV_FROM_HANDLE(radv_device, device, _device);
2326 struct radv_image_view *view;
2327
2328 view =
2329 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2330 if (view == NULL)
2331 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2332
2333 radv_image_view_init(view, device, pCreateInfo, NULL);
2334
2335 *pView = radv_image_view_to_handle(view);
2336
2337 return VK_SUCCESS;
2338 }
2339
2340 void
radv_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)2341 radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2342 {
2343 RADV_FROM_HANDLE(radv_device, device, _device);
2344 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2345
2346 if (!iview)
2347 return;
2348
2349 radv_image_view_finish(iview);
2350 vk_free2(&device->vk.alloc, pAllocator, iview);
2351 }
2352
2353 void
radv_buffer_view_init(struct radv_buffer_view * view,struct radv_device * device,const VkBufferViewCreateInfo * pCreateInfo)2354 radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2355 const VkBufferViewCreateInfo *pCreateInfo)
2356 {
2357 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2358
2359 vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2360
2361 view->bo = buffer->bo;
2362 view->range =
2363 pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
2364 view->vk_format = pCreateInfo->format;
2365
2366 radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
2367 view->state);
2368 }
2369
2370 void
radv_buffer_view_finish(struct radv_buffer_view * view)2371 radv_buffer_view_finish(struct radv_buffer_view *view)
2372 {
2373 vk_object_base_finish(&view->base);
2374 }
2375
2376 VkResult
radv_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)2377 radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2378 const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2379 {
2380 RADV_FROM_HANDLE(radv_device, device, _device);
2381 struct radv_buffer_view *view;
2382
2383 view =
2384 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2385 if (!view)
2386 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2387
2388 radv_buffer_view_init(view, device, pCreateInfo);
2389
2390 *pView = radv_buffer_view_to_handle(view);
2391
2392 return VK_SUCCESS;
2393 }
2394
2395 void
radv_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)2396 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2397 const VkAllocationCallbacks *pAllocator)
2398 {
2399 RADV_FROM_HANDLE(radv_device, device, _device);
2400 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2401
2402 if (!view)
2403 return;
2404
2405 radv_buffer_view_finish(view);
2406 vk_free2(&device->vk.alloc, pAllocator, view);
2407 }
2408