1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "ac_drm_fourcc.h"
29 #include "util/debug.h"
30 #include "util/u_atomic.h"
31 #include "vulkan/util/vk_format.h"
32 #include "radv_debug.h"
33 #include "radv_private.h"
34 #include "radv_radeon_winsys.h"
35 #include "sid.h"
36 #include "vk_format.h"
37 #include "vk_util.h"
38
39 #include "gfx10_format_table.h"
40
41 static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
42 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
43 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
44
45 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)46 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
47 VkFormat format)
48 {
49 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
50 assert(pCreateInfo->samples <= 1);
51 return RADEON_SURF_MODE_LINEAR_ALIGNED;
52 }
53
54 /* MSAA resources must be 2D tiled. */
55 if (pCreateInfo->samples > 1)
56 return RADEON_SURF_MODE_2D;
57
58 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
59 device->physical_device->rad_info.chip_class <= GFX8) {
60 /* this causes hangs in some VK CTS tests on GFX9. */
61 /* Textures with a very small height are recommended to be linear. */
62 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
63 /* Only very thin and long 2D textures should benefit from
64 * linear_aligned. */
65 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
66 return RADEON_SURF_MODE_LINEAR_ALIGNED;
67 }
68
69 return RADEON_SURF_MODE_2D;
70 }
71
72 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)73 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
74 VkFormat format)
75 {
76 /* TC-compat HTILE is only available for GFX8+. */
77 if (device->physical_device->rad_info.chip_class < GFX8)
78 return false;
79
80 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
81 return false;
82
83 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
84 return false;
85
86 /* Do not enable TC-compatible HTILE if the image isn't readable by a
87 * shader because no texture fetches will happen.
88 */
89 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
90 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
91 return false;
92
93 if (device->physical_device->rad_info.chip_class < GFX9) {
94 /* TC-compat HTILE for MSAA depth/stencil images is broken
95 * on GFX8 because the tiling doesn't match.
96 */
97 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
98 return false;
99
100 /* GFX9+ supports compression for both 32-bit and 16-bit depth
101 * surfaces, while GFX8 only supports 32-bit natively. Though,
102 * the driver allows TC-compat HTILE for 16-bit depth surfaces
103 * with no Z planes compression.
104 */
105 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
106 format != VK_FORMAT_D16_UNORM)
107 return false;
108 }
109
110 return true;
111 }
112
113 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)114 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
115 {
116 if (info->bo_metadata) {
117 if (device->physical_device->rad_info.chip_class >= GFX9)
118 return info->bo_metadata->u.gfx9.scanout;
119 else
120 return info->bo_metadata->u.legacy.scanout;
121 }
122
123 return info->scanout;
124 }
125
126 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)127 radv_image_use_fast_clear_for_image_early(const struct radv_device *device,
128 const struct radv_image *image)
129 {
130 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
131 return true;
132
133 if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
134 /* Do not enable CMASK or DCC for small surfaces where the cost
135 * of the eliminate pass can be higher than the benefit of fast
136 * clear. RadeonSI does this, but the image threshold is
137 * different.
138 */
139 return false;
140 }
141
142 return !!(image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
143 }
144
145 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)146 radv_image_use_fast_clear_for_image(const struct radv_device *device,
147 const struct radv_image *image)
148 {
149 if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
150 return true;
151
152 return radv_image_use_fast_clear_for_image_early(device, image) &&
153 (image->exclusive ||
154 /* Enable DCC for concurrent images if stores are
155 * supported because that means we can keep DCC compressed on
156 * all layouts/queues.
157 */
158 radv_image_use_dcc_image_stores(device, image));
159 }
160
161 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)162 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
163 VkFormat format, VkImageCreateFlags flags, bool *sign_reinterpret)
164 {
165 bool blendable;
166
167 if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
168 return false;
169
170 if (sign_reinterpret != NULL)
171 *sign_reinterpret = false;
172
173 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
174 const struct VkImageFormatListCreateInfo *format_list =
175 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
176 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
177
178 /* We have to ignore the existence of the list if viewFormatCount = 0 */
179 if (format_list && format_list->viewFormatCount) {
180 /* compatibility is transitive, so we only need to check
181 * one format with everything else. */
182 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
183 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
184 continue;
185
186 if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i],
187 sign_reinterpret))
188 return false;
189 }
190 } else {
191 return false;
192 }
193 }
194
195 return true;
196 }
197
198 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)199 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
200 {
201 if (format == VK_FORMAT_R32_SFLOAT && !device->image_float32_atomics)
202 return false;
203
204 return radv_is_atomic_format_supported(format);
205 }
206
207 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)208 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format,
209 VkImageCreateFlags flags)
210 {
211 if (radv_format_is_atomic_allowed(device, format))
212 return true;
213
214 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
215 const struct VkImageFormatListCreateInfo *format_list =
216 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
217 pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
218
219 /* We have to ignore the existence of the list if viewFormatCount = 0 */
220 if (format_list && format_list->viewFormatCount) {
221 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
222 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
223 return true;
224 }
225 }
226 }
227
228 return false;
229 }
230
231 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)232 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image,
233 const VkImageCreateInfo *pCreateInfo, VkFormat format,
234 bool *sign_reinterpret)
235 {
236 /* DCC (Delta Color Compression) is only available for GFX8+. */
237 if (device->physical_device->rad_info.chip_class < GFX8)
238 return false;
239
240 if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
241 return false;
242
243 if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
244 return false;
245
246 /*
247 * TODO: Enable DCC for storage images on GFX9 and earlier.
248 *
249 * Also disable DCC with atomics because even when DCC stores are
250 * supported atomics will always decompress. So if we are
251 * decompressing a lot anyway we might as well not have DCC.
252 */
253 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
254 (device->physical_device->rad_info.chip_class < GFX10 ||
255 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
256 return false;
257
258 /* Do not enable DCC for fragment shading rate attachments. */
259 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
260 return false;
261
262 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
263 return false;
264
265 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
266 return false;
267
268 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
269 image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
270 return false;
271
272 /* Do not enable DCC for mipmapped arrays because performance is worse. */
273 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
274 return false;
275
276 if (device->physical_device->rad_info.chip_class < GFX10) {
277 /* TODO: Add support for DCC MSAA on GFX8-9. */
278 if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
279 return false;
280
281 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
282 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
283 device->physical_device->rad_info.chip_class == GFX9)
284 return false;
285 }
286
287 return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
288 pCreateInfo->flags, sign_reinterpret);
289 }
290
291 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)292 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
293 {
294 if (!radv_image_has_dcc(image))
295 return false;
296
297 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
298 return true;
299
300 if (!radv_image_use_fast_clear_for_image(device, image))
301 return false;
302
303 /* TODO: Fix storage images with DCC without DCC image stores.
304 * Disabling it for now. */
305 if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
306 return false;
307
308 return true;
309 }
310
311 /*
312 * Whether to enable image stores with DCC compression for this image. If
313 * this function returns false the image subresource should be decompressed
314 * before using it with image stores.
315 *
316 * Note that this can have mixed performance implications, see
317 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
318 *
319 * This function assumes the image uses DCC compression.
320 */
321 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)322 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
323 {
324 return ac_surface_supports_dcc_image_stores(device->physical_device->rad_info.chip_class,
325 &image->planes[0].surface);
326 }
327
328 /*
329 * Whether to use a predicate to determine whether DCC is in a compressed
330 * state. This can be used to avoid decompressing an image multiple times.
331 */
332 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)333 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
334 {
335 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
336 }
337
338 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)339 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
340 {
341 return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
342 (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
343 }
344
345 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image)346 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
347 {
348 /* TODO:
349 * - Investigate about mips+layers.
350 * - Enable on other gens.
351 */
352 bool use_htile_for_mips =
353 image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
354
355 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
356 if (device->physical_device->rad_info.chip_class == GFX10 &&
357 image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->info.levels > 1)
358 return false;
359
360 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
361 * allowed with VRS attachments because we need HTILE.
362 */
363 if (image->info.width * image->info.height < 8 * 8 &&
364 !(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
365 !device->attachment_vrs_enabled)
366 return false;
367
368 if (device->instance->disable_htile_layers && image->info.array_size > 1)
369 return false;
370
371 return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
372 }
373
374 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)375 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
376 {
377 /* TC-compat CMASK is only available for GFX8+. */
378 if (device->physical_device->rad_info.chip_class < GFX8)
379 return false;
380
381 if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
382 return false;
383
384 /* TC-compat CMASK with storage images is supported on GFX10+. */
385 if ((image->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
386 device->physical_device->rad_info.chip_class < GFX10)
387 return false;
388
389 /* Do not enable TC-compatible if the image isn't readable by a shader
390 * because no texture fetches will happen.
391 */
392 if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
393 VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
394 return false;
395
396 /* If the image doesn't have FMASK, it can't be fetchable. */
397 if (!radv_image_has_fmask(image))
398 return false;
399
400 return true;
401 }
402
403 static uint32_t
si_get_bo_metadata_word1(const struct radv_device * device)404 si_get_bo_metadata_word1(const struct radv_device *device)
405 {
406 return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
407 }
408
409 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)410 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
411 {
412 if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
413 return false;
414
415 if (md->size_metadata < 40)
416 return false;
417
418 return true;
419 }
420
421 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)422 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
423 const struct radeon_bo_metadata *md)
424 {
425 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
426
427 if (device->physical_device->rad_info.chip_class >= GFX9) {
428 if (md->u.gfx9.swizzle_mode > 0)
429 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
430 else
431 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
432
433 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
434 } else {
435 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
436 surface->u.legacy.bankw = md->u.legacy.bankw;
437 surface->u.legacy.bankh = md->u.legacy.bankh;
438 surface->u.legacy.tile_split = md->u.legacy.tile_split;
439 surface->u.legacy.mtilea = md->u.legacy.mtilea;
440 surface->u.legacy.num_banks = md->u.legacy.num_banks;
441
442 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
443 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
444 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
445 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
446 else
447 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
448 }
449 }
450
451 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)452 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
453 const struct radv_image_create_info *create_info,
454 struct ac_surf_info *image_info)
455 {
456 unsigned width = image->info.width;
457 unsigned height = image->info.height;
458
459 /*
460 * minigbm sometimes allocates bigger images which is going to result in
461 * weird strides and other properties. Lets be lenient where possible and
462 * fail it on GFX10 (as we cannot cope there).
463 *
464 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
465 */
466 if (create_info->bo_metadata &&
467 radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
468 const struct radeon_bo_metadata *md = create_info->bo_metadata;
469
470 if (device->physical_device->rad_info.chip_class >= GFX10) {
471 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
472 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
473 } else {
474 width = G_008F18_WIDTH(md->metadata[4]) + 1;
475 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
476 }
477 }
478
479 if (image->info.width == width && image->info.height == height)
480 return VK_SUCCESS;
481
482 if (width < image->info.width || height < image->info.height) {
483 fprintf(stderr,
484 "The imported image has smaller dimensions than the internal\n"
485 "dimensions. Using it is going to fail badly, so we reject\n"
486 "this import.\n"
487 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
488 image->info.width, image->info.height, width, height);
489 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
490 } else if (device->physical_device->rad_info.chip_class >= GFX10) {
491 fprintf(stderr,
492 "Tried to import an image with inconsistent width on GFX10.\n"
493 "As GFX10 has no separate stride fields we cannot cope with\n"
494 "an inconsistency in width and will fail this import.\n"
495 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
496 image->info.width, image->info.height, width, height);
497 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
498 } else {
499 fprintf(stderr,
500 "Tried to import an image with inconsistent width on pre-GFX10.\n"
501 "As GFX10 has no separate stride fields we cannot cope with\n"
502 "an inconsistency and would fail on GFX10.\n"
503 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
504 image->info.width, image->info.height, width, height);
505 }
506 image_info->width = width;
507 image_info->height = height;
508
509 return VK_SUCCESS;
510 }
511
512 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)513 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
514 const struct radv_image_create_info *create_info,
515 struct ac_surf_info *image_info)
516 {
517 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
518 if (result != VK_SUCCESS)
519 return result;
520
521 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
522 if (create_info->bo_metadata) {
523 radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
524 create_info->bo_metadata);
525 }
526
527 if (radv_surface_has_scanout(device, create_info)) {
528 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
529 if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
530 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
531
532 image->info.surf_index = NULL;
533 }
534
535 if (create_info->prime_blit_src && device->physical_device->rad_info.chip_class == GFX9) {
536 /* Older SDMA hw can't handle DCC */
537 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
538 }
539 }
540 return VK_SUCCESS;
541 }
542
543 static VkFormat
etc2_emulation_format(VkFormat format)544 etc2_emulation_format(VkFormat format)
545 {
546 switch (format) {
547 case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
548 case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
549 case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
550 return VK_FORMAT_R8G8B8A8_UNORM;
551 case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
552 case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
553 case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
554 return VK_FORMAT_R8G8B8A8_SRGB;
555 case VK_FORMAT_EAC_R11_UNORM_BLOCK:
556 return VK_FORMAT_R16_UNORM;
557 case VK_FORMAT_EAC_R11_SNORM_BLOCK:
558 return VK_FORMAT_R16_SNORM;
559 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
560 return VK_FORMAT_R16G16_UNORM;
561 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
562 return VK_FORMAT_R16G16_SNORM;
563 default:
564 unreachable("Unhandled ETC format");
565 }
566 }
567
568 static VkFormat
radv_image_get_plane_format(const struct radv_physical_device * pdev,const struct radv_image * image,unsigned plane)569 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image,
570 unsigned plane)
571 {
572 if (pdev->emulate_etc2 &&
573 vk_format_description(image->vk_format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
574 if (plane == 0)
575 return image->vk_format;
576 return etc2_emulation_format(image->vk_format);
577 }
578 return vk_format_get_plane_format(image->vk_format, plane);
579 }
580
581 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)582 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
583 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
584 {
585 uint64_t flags;
586 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
587 VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
588 const struct util_format_description *desc = vk_format_description(format);
589 bool is_depth, is_stencil;
590
591 is_depth = util_format_has_depth(desc);
592 is_stencil = util_format_has_stencil(desc);
593
594 flags = RADEON_SURF_SET(array_mode, MODE);
595
596 switch (pCreateInfo->imageType) {
597 case VK_IMAGE_TYPE_1D:
598 if (pCreateInfo->arrayLayers > 1)
599 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
600 else
601 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
602 break;
603 case VK_IMAGE_TYPE_2D:
604 if (pCreateInfo->arrayLayers > 1)
605 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
606 else
607 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
608 break;
609 case VK_IMAGE_TYPE_3D:
610 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
611 break;
612 default:
613 unreachable("unhandled image type");
614 }
615
616 /* Required for clearing/initializing a specific layer on GFX8. */
617 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
618
619 if (is_depth) {
620 flags |= RADEON_SURF_ZBUFFER;
621
622 if (radv_use_htile_for_image(device, image) &&
623 !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
624 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
625 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
626 } else {
627 flags |= RADEON_SURF_NO_HTILE;
628 }
629 }
630
631 if (is_stencil)
632 flags |= RADEON_SURF_SBUFFER;
633
634 if (device->physical_device->rad_info.chip_class >= GFX9 &&
635 pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
636 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
637 flags |= RADEON_SURF_NO_RENDER_TARGET;
638
639 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format,
640 &image->dcc_sign_reinterpret))
641 flags |= RADEON_SURF_DISABLE_DCC;
642
643 if (!radv_use_fmask_for_image(device, image))
644 flags |= RADEON_SURF_NO_FMASK;
645
646 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
647 flags |=
648 RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
649 }
650
651 return flags;
652 }
653
654 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)655 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
656 {
657 if (stencil)
658 return plane->surface.u.legacy.zs.stencil_tiling_index[level];
659 else
660 return plane->surface.u.legacy.tiling_index[level];
661 }
662
663 static unsigned
radv_map_swizzle(unsigned swizzle)664 radv_map_swizzle(unsigned swizzle)
665 {
666 switch (swizzle) {
667 case PIPE_SWIZZLE_Y:
668 return V_008F0C_SQ_SEL_Y;
669 case PIPE_SWIZZLE_Z:
670 return V_008F0C_SQ_SEL_Z;
671 case PIPE_SWIZZLE_W:
672 return V_008F0C_SQ_SEL_W;
673 case PIPE_SWIZZLE_0:
674 return V_008F0C_SQ_SEL_0;
675 case PIPE_SWIZZLE_1:
676 return V_008F0C_SQ_SEL_1;
677 default: /* PIPE_SWIZZLE_X */
678 return V_008F0C_SQ_SEL_X;
679 }
680 }
681
682 static void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])683 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
684 enum pipe_swizzle swizzle[4])
685 {
686 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
687 /* 64-bit formats only support storage images and storage images
688 * require identity component mappings. We use 32-bit
689 * instructions to access 64-bit images, so we need a special
690 * case here.
691 *
692 * The zw components are 1,0 so that they can be easily be used
693 * by loads to create the w component, which has to be 0 for
694 * NULL descriptors.
695 */
696 swizzle[0] = PIPE_SWIZZLE_X;
697 swizzle[1] = PIPE_SWIZZLE_Y;
698 swizzle[2] = PIPE_SWIZZLE_1;
699 swizzle[3] = PIPE_SWIZZLE_0;
700 } else if (!mapping) {
701 for (unsigned i = 0; i < 4; i++)
702 swizzle[i] = desc->swizzle[i];
703 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
704 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
705 PIPE_SWIZZLE_1};
706 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
707 } else {
708 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
709 }
710 }
711
712 static void
radv_make_buffer_descriptor(struct radv_device * device,struct radv_buffer * buffer,VkFormat vk_format,unsigned offset,unsigned range,uint32_t * state)713 radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
714 VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
715 {
716 const struct util_format_description *desc;
717 unsigned stride;
718 uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
719 uint64_t va = gpu_address + buffer->offset;
720 unsigned num_format, data_format;
721 int first_non_void;
722 enum pipe_swizzle swizzle[4];
723 desc = vk_format_description(vk_format);
724 first_non_void = vk_format_get_first_non_void_channel(vk_format);
725 stride = desc->block.bits / 8;
726
727 radv_compose_swizzle(desc, NULL, swizzle);
728
729 va += offset;
730 state[0] = va;
731 state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
732
733 if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
734 range /= stride;
735 }
736
737 state[2] = range;
738 state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
739 S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
740 S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
741 S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
742
743 if (device->physical_device->rad_info.chip_class >= GFX10) {
744 const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
745
746 /* OOB_SELECT chooses the out-of-bounds check:
747 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
748 * - 1: index >= NUM_RECORDS
749 * - 2: NUM_RECORDS == 0
750 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
751 * else: swizzle_address >= NUM_RECORDS
752 */
753 state[3] |= S_008F0C_FORMAT(fmt->img_format) |
754 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
755 S_008F0C_RESOURCE_LEVEL(1);
756 } else {
757 num_format = radv_translate_buffer_numformat(desc, first_non_void);
758 data_format = radv_translate_buffer_dataformat(desc, first_non_void);
759
760 assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
761 assert(num_format != ~0);
762
763 state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
764 }
765 }
766
767 static void
si_set_mutable_tex_desc_fields(struct radv_device * device,struct radv_image * image,const struct legacy_surf_level * base_level_info,unsigned plane_id,unsigned base_level,unsigned first_level,unsigned block_width,bool is_stencil,bool is_storage_image,bool disable_compression,bool enable_write_compression,uint32_t * state)768 si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
769 const struct legacy_surf_level *base_level_info, unsigned plane_id,
770 unsigned base_level, unsigned first_level, unsigned block_width,
771 bool is_stencil, bool is_storage_image, bool disable_compression,
772 bool enable_write_compression, uint32_t *state)
773 {
774 struct radv_image_plane *plane = &image->planes[plane_id];
775 uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
776 uint64_t va = gpu_address;
777 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
778 uint64_t meta_va = 0;
779 if (chip_class >= GFX9) {
780 if (is_stencil)
781 va += plane->surface.u.gfx9.zs.stencil_offset;
782 else
783 va += plane->surface.u.gfx9.surf_offset;
784 } else
785 va += (uint64_t)base_level_info->offset_256B * 256;
786
787 state[0] = va >> 8;
788 if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
789 state[0] |= plane->surface.tile_swizzle;
790 state[1] &= C_008F14_BASE_ADDRESS_HI;
791 state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
792
793 if (chip_class >= GFX8) {
794 state[6] &= C_008F28_COMPRESSION_EN;
795 state[7] = 0;
796 if (!disable_compression && radv_dcc_enabled(image, first_level)) {
797 meta_va = gpu_address + plane->surface.meta_offset;
798 if (chip_class <= GFX8)
799 meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
800
801 unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
802 dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
803 meta_va |= dcc_tile_swizzle;
804 } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
805 meta_va = gpu_address + plane->surface.meta_offset;
806 }
807
808 if (meta_va) {
809 state[6] |= S_008F28_COMPRESSION_EN(1);
810 if (chip_class <= GFX9)
811 state[7] = meta_va >> 8;
812 }
813 }
814
815 if (chip_class >= GFX10) {
816 state[3] &= C_00A00C_SW_MODE;
817
818 if (is_stencil) {
819 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
820 } else {
821 state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
822 }
823
824 state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
825
826 if (meta_va) {
827 struct gfx9_surf_meta_flags meta = {
828 .rb_aligned = 1,
829 .pipe_aligned = 1,
830 };
831
832 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
833 meta = plane->surface.u.gfx9.color.dcc;
834
835 if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
836 state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
837
838 state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
839 S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
840 }
841
842 state[7] = meta_va >> 16;
843 } else if (chip_class == GFX9) {
844 state[3] &= C_008F1C_SW_MODE;
845 state[4] &= C_008F20_PITCH;
846
847 if (is_stencil) {
848 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
849 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
850 } else {
851 state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
852 state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
853 }
854
855 state[5] &=
856 C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
857 if (meta_va) {
858 struct gfx9_surf_meta_flags meta = {
859 .rb_aligned = 1,
860 .pipe_aligned = 1,
861 };
862
863 if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
864 meta = plane->surface.u.gfx9.color.dcc;
865
866 state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
867 S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
868 S_008F24_META_RB_ALIGNED(meta.rb_aligned);
869 }
870 } else {
871 /* GFX6-GFX8 */
872 unsigned pitch = base_level_info->nblk_x * block_width;
873 unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
874
875 state[3] &= C_008F1C_TILING_INDEX;
876 state[3] |= S_008F1C_TILING_INDEX(index);
877 state[4] &= C_008F20_PITCH;
878 state[4] |= S_008F20_PITCH(pitch - 1);
879 }
880 }
881
882 static unsigned
radv_tex_dim(VkImageType image_type,VkImageViewType view_type,unsigned nr_layers,unsigned nr_samples,bool is_storage_image,bool gfx9)883 radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
884 unsigned nr_samples, bool is_storage_image, bool gfx9)
885 {
886 if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
887 return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
888
889 /* GFX9 allocates 1D textures as 2D. */
890 if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
891 image_type = VK_IMAGE_TYPE_2D;
892 switch (image_type) {
893 case VK_IMAGE_TYPE_1D:
894 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
895 case VK_IMAGE_TYPE_2D:
896 if (nr_samples > 1)
897 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
898 else
899 return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
900 case VK_IMAGE_TYPE_3D:
901 if (view_type == VK_IMAGE_VIEW_TYPE_3D)
902 return V_008F1C_SQ_RSRC_IMG_3D;
903 else
904 return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
905 default:
906 unreachable("illegal image type");
907 }
908 }
909
910 static unsigned
gfx9_border_color_swizzle(const struct util_format_description * desc)911 gfx9_border_color_swizzle(const struct util_format_description *desc)
912 {
913 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
914
915 if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
916 /* For the pre-defined border color values (white, opaque
917 * black, transparent black), the only thing that matters is
918 * that the alpha channel winds up in the correct place
919 * (because the RGB channels are all the same) so either of
920 * these enumerations will work.
921 */
922 if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
923 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
924 else
925 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
926 } else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
927 if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
928 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
929 else
930 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
931 } else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
932 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
933 } else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
934 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
935 }
936
937 return bc_swizzle;
938 }
939
940 bool
vi_alpha_is_on_msb(struct radv_device * device,VkFormat format)941 vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
942 {
943 const struct util_format_description *desc = vk_format_description(format);
944
945 if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
946 return desc->swizzle[3] == PIPE_SWIZZLE_X;
947
948 return radv_translate_colorswap(format, false) <= 1;
949 }
950 /**
951 * Build the sampler view descriptor for a texture (GFX10).
952 */
953 static void
gfx10_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,float min_lod,uint32_t * state,uint32_t * fmask_state)954 gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
955 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
956 const VkComponentMapping *mapping, unsigned first_level,
957 unsigned last_level, unsigned first_layer, unsigned last_layer,
958 unsigned width, unsigned height, unsigned depth, float min_lod,
959 uint32_t *state, uint32_t *fmask_state)
960 {
961 const struct util_format_description *desc;
962 enum pipe_swizzle swizzle[4];
963 unsigned img_format;
964 unsigned type;
965
966 desc = vk_format_description(vk_format);
967
968 /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
969 * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
970 * this uses the Gallium formats to set the description. */
971 if (image->vk_format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
972 vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
973 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
974 } else if (image->vk_format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
975 vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
976 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
977 }
978
979 img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
980
981 radv_compose_swizzle(desc, mapping, swizzle);
982
983 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
984 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
985 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
986 height = 1;
987 depth = image->info.array_size;
988 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
989 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
990 depth = image->info.array_size;
991 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
992 depth = image->info.array_size / 6;
993
994 state[0] = 0;
995 state[1] = S_00A004_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) |
996 S_00A004_FORMAT(img_format) |
997 S_00A004_WIDTH_LO(width - 1);
998 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
999 S_00A008_RESOURCE_LEVEL(1);
1000 state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1001 S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1002 S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1003 S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1004 S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1005 S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1006 : last_level) |
1007 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
1008 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
1009 * to know the total number of layers.
1010 */
1011 state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
1012 S_00A010_BASE_ARRAY(first_layer);
1013 state[5] = S_00A014_ARRAY_PITCH(0) |
1014 S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1015 : image->info.levels - 1) |
1016 S_00A014_PERF_MOD(4);
1017 state[6] = 0;
1018 state[7] = 0;
1019
1020 if (radv_dcc_enabled(image, first_level)) {
1021 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
1022 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
1023 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
1024 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1025 }
1026
1027 if (radv_image_get_iterate256(device, image)) {
1028 state[6] |= S_00A018_ITERATE_256(1);
1029 }
1030
1031 /* Initialize the sampler view for FMASK. */
1032 if (fmask_state) {
1033 if (radv_image_has_fmask(image)) {
1034 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1035 uint32_t format;
1036 uint64_t va;
1037
1038 assert(image->plane_count == 1);
1039
1040 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1041
1042 switch (image->info.samples) {
1043 case 2:
1044 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
1045 break;
1046 case 4:
1047 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
1048 break;
1049 case 8:
1050 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
1051 break;
1052 default:
1053 unreachable("invalid nr_samples");
1054 }
1055
1056 fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
1057 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
1058 S_00A004_WIDTH_LO(width - 1);
1059 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
1060 S_00A008_RESOURCE_LEVEL(1);
1061 fmask_state[3] =
1062 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1063 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1064 S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
1065 S_00A00C_TYPE(
1066 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1067 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
1068 fmask_state[5] = 0;
1069 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
1070 fmask_state[7] = 0;
1071
1072 if (radv_image_is_tc_compat_cmask(image)) {
1073 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1074
1075 fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
1076 fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
1077 fmask_state[7] |= va >> 16;
1078 }
1079 } else
1080 memset(fmask_state, 0, 8 * 4);
1081 }
1082 }
1083
1084 /**
1085 * Build the sampler view descriptor for a texture (SI-GFX9)
1086 */
1087 static void
si_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,float min_lod,uint32_t * state,uint32_t * fmask_state)1088 si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1089 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1090 const VkComponentMapping *mapping, unsigned first_level,
1091 unsigned last_level, unsigned first_layer, unsigned last_layer,
1092 unsigned width, unsigned height, unsigned depth, float min_lod,
1093 uint32_t *state, uint32_t *fmask_state)
1094 {
1095 const struct util_format_description *desc;
1096 enum pipe_swizzle swizzle[4];
1097 int first_non_void;
1098 unsigned num_format, data_format, type;
1099
1100 desc = vk_format_description(vk_format);
1101
1102 /* For emulated ETC2 without alpha we need to override the format to a 3-componenent format, so
1103 * that border colors work correctly (alpha forced to 1). Since Vulkan has no such format,
1104 * this uses the Gallium formats to set the description. */
1105 if (image->vk_format == VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK &&
1106 vk_format == VK_FORMAT_R8G8B8A8_UNORM) {
1107 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_UNORM);
1108 } else if (image->vk_format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK &&
1109 vk_format == VK_FORMAT_R8G8B8A8_SRGB) {
1110 desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
1111 }
1112
1113 radv_compose_swizzle(desc, mapping, swizzle);
1114
1115 first_non_void = vk_format_get_first_non_void_channel(vk_format);
1116
1117 num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
1118 if (num_format == ~0) {
1119 num_format = 0;
1120 }
1121
1122 data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
1123 if (data_format == ~0) {
1124 data_format = 0;
1125 }
1126
1127 /* S8 with either Z16 or Z32 HTILE need a special format. */
1128 if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1129 radv_image_is_tc_compat_htile(image)) {
1130 if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1131 data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1132 else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
1133 data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1134 }
1135 type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
1136 is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
1137 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1138 height = 1;
1139 depth = image->info.array_size;
1140 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1141 if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1142 depth = image->info.array_size;
1143 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1144 depth = image->info.array_size / 6;
1145
1146 state[0] = 0;
1147 state[1] = (S_008F14_MIN_LOD(radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8)) |
1148 S_008F14_DATA_FORMAT(data_format) |
1149 S_008F14_NUM_FORMAT(num_format));
1150 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1151 state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1152 S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1153 S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1154 S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1155 S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1156 S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1157 : last_level) |
1158 S_008F1C_TYPE(type));
1159 state[4] = 0;
1160 state[5] = S_008F24_BASE_ARRAY(first_layer);
1161 state[6] = 0;
1162 state[7] = 0;
1163
1164 if (device->physical_device->rad_info.chip_class == GFX9) {
1165 unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1166
1167 /* Depth is the last accessible layer on Gfx9.
1168 * The hw doesn't need to know the total number of layers.
1169 */
1170 if (type == V_008F1C_SQ_RSRC_IMG_3D)
1171 state[4] |= S_008F20_DEPTH(depth - 1);
1172 else
1173 state[4] |= S_008F20_DEPTH(last_layer);
1174
1175 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1176 state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1177 : image->info.levels - 1);
1178 } else {
1179 state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1180 state[4] |= S_008F20_DEPTH(depth - 1);
1181 state[5] |= S_008F24_LAST_ARRAY(last_layer);
1182 }
1183 if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1184 image->planes[0].surface.meta_offset) {
1185 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1186 } else {
1187 if (device->instance->disable_aniso_single_level) {
1188 /* The last dword is unused by hw. The shader uses it to clear
1189 * bits in the first dword of sampler state.
1190 */
1191 if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1192 if (first_level == last_level)
1193 state[7] = C_008F30_MAX_ANISO_RATIO;
1194 else
1195 state[7] = 0xffffffff;
1196 }
1197 }
1198 }
1199
1200 /* Initialize the sampler view for FMASK. */
1201 if (fmask_state) {
1202 if (radv_image_has_fmask(image)) {
1203 uint32_t fmask_format;
1204 uint64_t gpu_address = radv_buffer_get_va(image->bo);
1205 uint64_t va;
1206
1207 assert(image->plane_count == 1);
1208
1209 va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1210
1211 if (device->physical_device->rad_info.chip_class == GFX9) {
1212 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1213 switch (image->info.samples) {
1214 case 2:
1215 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1216 break;
1217 case 4:
1218 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1219 break;
1220 case 8:
1221 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1222 break;
1223 default:
1224 unreachable("invalid nr_samples");
1225 }
1226 } else {
1227 switch (image->info.samples) {
1228 case 2:
1229 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1230 break;
1231 case 4:
1232 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1233 break;
1234 case 8:
1235 fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1236 break;
1237 default:
1238 assert(0);
1239 fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1240 }
1241 num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1242 }
1243
1244 fmask_state[0] = va >> 8;
1245 fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1246 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1247 S_008F14_NUM_FORMAT(num_format);
1248 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1249 fmask_state[3] =
1250 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1251 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1252 S_008F1C_TYPE(
1253 radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1254 fmask_state[4] = 0;
1255 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1256 fmask_state[6] = 0;
1257 fmask_state[7] = 0;
1258
1259 if (device->physical_device->rad_info.chip_class == GFX9) {
1260 fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1261 fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1262 S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1263 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1264
1265 if (radv_image_is_tc_compat_cmask(image)) {
1266 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1267
1268 fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1269 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1270 fmask_state[7] |= va >> 8;
1271 }
1272 } else {
1273 fmask_state[3] |=
1274 S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1275 fmask_state[4] |=
1276 S_008F20_DEPTH(depth - 1) |
1277 S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1278 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1279
1280 if (radv_image_is_tc_compat_cmask(image)) {
1281 va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1282
1283 fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1284 fmask_state[7] |= va >> 8;
1285 }
1286 }
1287 } else
1288 memset(fmask_state, 0, 8 * 4);
1289 }
1290 }
1291
1292 static void
radv_make_texture_descriptor(struct radv_device * device,struct radv_image * image,bool is_storage_image,VkImageViewType view_type,VkFormat vk_format,const VkComponentMapping * mapping,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned width,unsigned height,unsigned depth,float min_lod,uint32_t * state,uint32_t * fmask_state)1293 radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1294 bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1295 const VkComponentMapping *mapping, unsigned first_level,
1296 unsigned last_level, unsigned first_layer, unsigned last_layer,
1297 unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
1298 uint32_t *fmask_state)
1299 {
1300 if (device->physical_device->rad_info.chip_class >= GFX10) {
1301 gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1302 first_level, last_level, first_layer, last_layer, width, height,
1303 depth, min_lod, state, fmask_state);
1304 } else {
1305 si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1306 first_level, last_level, first_layer, last_layer, width, height,
1307 depth, min_lod, state, fmask_state);
1308 }
1309 }
1310
1311 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * md)1312 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1313 struct radeon_bo_metadata *md)
1314 {
1315 static const VkComponentMapping fixedmapping;
1316 uint32_t desc[8];
1317
1318 assert(image->plane_count == 1);
1319
1320 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
1321 image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
1322 image->info.array_size - 1, image->info.width, image->info.height,
1323 image->info.depth, 0.0f, desc, NULL);
1324
1325 si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1326 0, image->planes[0].surface.blk_w, false, false, false, false,
1327 desc);
1328
1329 ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1330 image->info.levels, desc, &md->size_metadata, md->metadata);
1331 }
1332
1333 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)1334 radv_init_metadata(struct radv_device *device, struct radv_image *image,
1335 struct radeon_bo_metadata *metadata)
1336 {
1337 struct radeon_surf *surface = &image->planes[0].surface;
1338
1339 memset(metadata, 0, sizeof(*metadata));
1340
1341 if (device->physical_device->rad_info.chip_class >= GFX9) {
1342 uint64_t dcc_offset =
1343 image->offset +
1344 (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1345 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1346 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1347 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1348 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1349 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1350 metadata->u.gfx9.dcc_max_compressed_block_size =
1351 surface->u.gfx9.color.dcc.max_compressed_block_size;
1352 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1353 } else {
1354 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1355 ? RADEON_LAYOUT_TILED
1356 : RADEON_LAYOUT_LINEAR;
1357 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1358 ? RADEON_LAYOUT_TILED
1359 : RADEON_LAYOUT_LINEAR;
1360 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1361 metadata->u.legacy.bankw = surface->u.legacy.bankw;
1362 metadata->u.legacy.bankh = surface->u.legacy.bankh;
1363 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1364 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1365 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1366 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1367 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1368 }
1369 radv_query_opaque_metadata(device, image, metadata);
1370 }
1371
1372 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)1373 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1374 uint64_t offset, uint32_t stride)
1375 {
1376 ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1377 image->info.levels, offset, stride);
1378 }
1379
1380 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)1381 radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1382 const struct radv_image *image, struct radeon_surf *surf)
1383 {
1384 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1385 image->info.depth > 1 || radv_image_has_dcc(image) ||
1386 !radv_image_use_fast_clear_for_image(device, image) ||
1387 (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1388 return;
1389
1390 assert(image->info.storage_samples == 1);
1391
1392 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1393 surf->total_size = surf->cmask_offset + surf->cmask_size;
1394 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1395 }
1396
1397 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)1398 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1399 {
1400 /* images with modifiers can be potentially imported */
1401 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1402 return;
1403
1404 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
1405 image->fce_pred_offset = image->size;
1406 image->size += 8 * image->info.levels;
1407 }
1408
1409 if (radv_image_use_dcc_predication(device, image)) {
1410 image->dcc_pred_offset = image->size;
1411 image->size += 8 * image->info.levels;
1412 }
1413
1414 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) ||
1415 radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1416 image->clear_value_offset = image->size;
1417 image->size += 8 * image->info.levels;
1418 }
1419
1420 if (radv_image_is_tc_compat_htile(image) &&
1421 device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1422 /* Metadata for the TC-compatible HTILE hardware bug which
1423 * have to be fixed by updating ZRANGE_PRECISION when doing
1424 * fast depth clears to 0.0f.
1425 */
1426 image->tc_compat_zrange_offset = image->size;
1427 image->size += image->info.levels * 4;
1428 }
1429 }
1430
1431 /* Determine if the image is affected by the pipe misaligned metadata issue
1432 * which requires to invalidate L2.
1433 */
1434 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)1435 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1436 {
1437 struct radeon_info *rad_info = &device->physical_device->rad_info;
1438 int log2_samples = util_logbase2(image->info.samples);
1439
1440 assert(rad_info->chip_class >= GFX10);
1441
1442 for (unsigned i = 0; i < image->plane_count; ++i) {
1443 VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
1444 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1445 int log2_bpp_and_samples;
1446
1447 if (rad_info->chip_class >= GFX10_3) {
1448 log2_bpp_and_samples = log2_bpp + log2_samples;
1449 } else {
1450 if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
1451 log2_bpp = 2;
1452 }
1453
1454 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1455 }
1456
1457 int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1458 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1459
1460 if (vk_format_has_depth(image->vk_format)) {
1461 if (radv_image_is_tc_compat_htile(image) && overlap) {
1462 return true;
1463 }
1464 } else {
1465 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1466 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1467 int samples_overlap = MIN2(log2_samples, overlap);
1468
1469 /* TODO: It shouldn't be necessary if the image has DCC but
1470 * not readable by shader.
1471 */
1472 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1473 (samples_overlap > log2_samples_frag_diff)) {
1474 return true;
1475 }
1476 }
1477 }
1478
1479 return false;
1480 }
1481
1482 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)1483 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1484 {
1485 if (device->physical_device->rad_info.chip_class >= GFX10) {
1486 return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1487 !radv_image_is_pipe_misaligned(device, image);
1488 } else if (device->physical_device->rad_info.chip_class == GFX9) {
1489 if (image->info.samples == 1 &&
1490 (image->usage &
1491 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1492 !vk_format_has_stencil(image->vk_format)) {
1493 /* Single-sample color and single-sample depth
1494 * (not stencil) are coherent with shaders on
1495 * GFX9.
1496 */
1497 return true;
1498 }
1499 }
1500
1501 return false;
1502 }
1503
1504 /**
1505 * Determine if the given image can be fast cleared.
1506 */
1507 static bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)1508 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1509 {
1510 if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1511 return false;
1512
1513 if (vk_format_is_color(image->vk_format)) {
1514 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1515 return false;
1516
1517 /* RB+ doesn't work with CMASK fast clear on Stoney. */
1518 if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1519 return false;
1520 } else {
1521 if (!radv_image_has_htile(image))
1522 return false;
1523 }
1524
1525 /* Do not fast clears 3D images. */
1526 if (image->type == VK_IMAGE_TYPE_3D)
1527 return false;
1528
1529 return true;
1530 }
1531
1532 /**
1533 * Determine if the given image can be fast cleared using comp-to-single.
1534 */
1535 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)1536 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
1537 {
1538 /* comp-to-single is only available for GFX10+. */
1539 if (device->physical_device->rad_info.chip_class < GFX10)
1540 return false;
1541
1542 /* If the image can't be fast cleared, comp-to-single can't be used. */
1543 if (!radv_image_can_fast_clear(device, image))
1544 return false;
1545
1546 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1547 if (!radv_image_has_dcc(image))
1548 return false;
1549
1550 /* It seems 8bpp and 16bpp require RB+ to work. */
1551 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk_format);
1552 if (bytes_per_pixel <= 2 && !device->physical_device->rad_info.rbplus_allowed)
1553 return false;
1554
1555 return true;
1556 }
1557
1558 static unsigned
radv_get_internal_plane_count(const struct radv_physical_device * pdev,VkFormat fmt)1559 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
1560 {
1561 if (pdev->emulate_etc2 && vk_format_description(fmt)->layout == UTIL_FORMAT_LAYOUT_ETC)
1562 return 2;
1563 return vk_format_get_plane_count(fmt);
1564 }
1565
1566 static void
radv_image_reset_layout(const struct radv_physical_device * pdev,struct radv_image * image)1567 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
1568 {
1569 image->size = 0;
1570 image->alignment = 1;
1571
1572 image->tc_compatible_cmask = 0;
1573 image->fce_pred_offset = image->dcc_pred_offset = 0;
1574 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1575
1576 unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk_format);
1577 for (unsigned i = 0; i < plane_count; ++i) {
1578 VkFormat format = radv_image_get_plane_format(pdev, image, i);
1579 if (vk_format_has_depth(format))
1580 format = vk_format_depth_only(format);
1581
1582 uint64_t flags = image->planes[i].surface.flags;
1583 uint64_t modifier = image->planes[i].surface.modifier;
1584 memset(image->planes + i, 0, sizeof(image->planes[i]));
1585
1586 image->planes[i].surface.flags = flags;
1587 image->planes[i].surface.modifier = modifier;
1588 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1589 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1590 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1591
1592 /* align byte per element on dword */
1593 if (image->planes[i].surface.bpe == 3) {
1594 image->planes[i].surface.bpe = 4;
1595 }
1596 }
1597 }
1598
1599 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,struct radv_image * image)1600 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1601 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1602 struct radv_image *image)
1603 {
1604 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1605 * common internal case. */
1606 create_info.vk_info = NULL;
1607
1608 struct ac_surf_info image_info = image->info;
1609 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1610 if (result != VK_SUCCESS)
1611 return result;
1612
1613 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1614
1615 radv_image_reset_layout(device->physical_device, image);
1616
1617 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk_format);
1618 for (unsigned plane = 0; plane < plane_count; ++plane) {
1619 struct ac_surf_info info = image_info;
1620 uint64_t offset;
1621 unsigned stride;
1622
1623 info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
1624 info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
1625
1626 if (create_info.no_metadata_planes || plane_count > 1) {
1627 image->planes[plane].surface.flags |=
1628 RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1629 }
1630
1631 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1632
1633 if (plane == 0) {
1634 if (!radv_use_dcc_for_image_late(device, image))
1635 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1636 }
1637
1638 if (create_info.bo_metadata && !mod_info &&
1639 !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1640 &image->planes[plane].surface, image_info.storage_samples,
1641 image_info.levels, create_info.bo_metadata->size_metadata,
1642 create_info.bo_metadata->metadata))
1643 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1644
1645 if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 &&
1646 !mod_info)
1647 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1648
1649 if (mod_info) {
1650 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1651 !mod_info->pPlaneLayouts[plane].rowPitch)
1652 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1653
1654 offset = mod_info->pPlaneLayouts[plane].offset;
1655 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1656 } else {
1657 offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1658 stride = 0; /* 0 means no override */
1659 }
1660
1661 if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1662 &image->planes[plane].surface, image->info.levels,
1663 offset, stride))
1664 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1665
1666 /* Validate DCC offsets in modifier layout. */
1667 if (plane_count == 1 && mod_info) {
1668 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1669 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1670 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1671
1672 for (unsigned i = 1; i < mem_planes; ++i) {
1673 if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1674 &image->planes[plane].surface, i,
1675 0) != mod_info->pPlaneLayouts[i].offset)
1676 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1677 }
1678 }
1679
1680 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1681 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1682
1683 image->planes[plane].format =
1684 radv_image_get_plane_format(device->physical_device, image, plane);
1685 }
1686
1687 image->tc_compatible_cmask =
1688 radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1689
1690 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1691
1692 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1693
1694 radv_image_alloc_values(device, image);
1695
1696 assert(image->planes[0].surface.surf_size);
1697 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1698 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1699 return VK_SUCCESS;
1700 }
1701
1702 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1703 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1704 struct radv_image *image)
1705 {
1706 if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1707 device->ws->buffer_destroy(device->ws, image->bo);
1708
1709 if (image->owned_memory != VK_NULL_HANDLE) {
1710 RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1711 radv_free_memory(device, pAllocator, mem);
1712 }
1713
1714 vk_object_base_finish(&image->base);
1715 vk_free2(&device->vk.alloc, pAllocator, image);
1716 }
1717
1718 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1719 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1720 {
1721 fprintf(stderr, "Image:\n");
1722 fprintf(stderr,
1723 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1724 "width=%" PRIu32 ", height=%" PRIu32 ", "
1725 "offset=%" PRIu64 ", array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1726 image->size, image->alignment, image->info.width, image->info.height, image->offset,
1727 image->info.array_size, image->info.levels);
1728 for (unsigned i = 0; i < image->plane_count; ++i) {
1729 const struct radv_image_plane *plane = &image->planes[i];
1730 const struct radeon_surf *surf = &plane->surface;
1731 const struct util_format_description *desc = vk_format_description(plane->format);
1732 uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1733 &plane->surface, 0, 0);
1734
1735 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1736
1737 ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1738 }
1739 }
1740
1741 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1742 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1743 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1744 {
1745 const struct radv_physical_device *pdev = dev->physical_device;
1746 unsigned mod_count;
1747
1748 assert(mod_list->drmFormatModifierCount);
1749
1750 /* We can allow everything here as it does not affect order and the application
1751 * is only allowed to specify modifiers that we support. */
1752 const struct ac_modifier_options modifier_options = {
1753 .dcc = true,
1754 .dcc_retile = true,
1755 };
1756
1757 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1758 &mod_count, NULL);
1759
1760 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1761
1762 /* If allocations fail, fall back to a dumber solution. */
1763 if (!mods)
1764 return mod_list->pDrmFormatModifiers[0];
1765
1766 ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1767 &mod_count, mods);
1768
1769 for (unsigned i = 0; i < mod_count; ++i) {
1770 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1771 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1772 free(mods);
1773 return mod_list->pDrmFormatModifiers[j];
1774 }
1775 }
1776 }
1777 unreachable("App specified an invalid modifier");
1778 }
1779
1780 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage)1781 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1782 const VkAllocationCallbacks *alloc, VkImage *pImage)
1783 {
1784 RADV_FROM_HANDLE(radv_device, device, _device);
1785 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1786 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1787 struct radv_image *image = NULL;
1788 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1789 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1790 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1791 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1792 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1793 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1794
1795 unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
1796
1797 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1798
1799 radv_assert(pCreateInfo->mipLevels > 0);
1800 radv_assert(pCreateInfo->arrayLayers > 0);
1801 radv_assert(pCreateInfo->samples > 0);
1802 radv_assert(pCreateInfo->extent.width > 0);
1803 radv_assert(pCreateInfo->extent.height > 0);
1804 radv_assert(pCreateInfo->extent.depth > 0);
1805
1806 image =
1807 vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1808 if (!image)
1809 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1810
1811 vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1812
1813 image->type = pCreateInfo->imageType;
1814 image->info.width = pCreateInfo->extent.width;
1815 image->info.height = pCreateInfo->extent.height;
1816 image->info.depth = pCreateInfo->extent.depth;
1817 image->info.samples = pCreateInfo->samples;
1818 image->info.storage_samples = pCreateInfo->samples;
1819 image->info.array_size = pCreateInfo->arrayLayers;
1820 image->info.levels = pCreateInfo->mipLevels;
1821 image->info.num_channels = vk_format_get_nr_components(format);
1822
1823 image->vk_format = format;
1824 image->tiling = pCreateInfo->tiling;
1825 image->usage = pCreateInfo->usage;
1826 image->flags = pCreateInfo->flags;
1827 image->plane_count = vk_format_get_plane_count(format);
1828
1829 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1830 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1831 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1832 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1833 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1834 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1835 else
1836 image->queue_family_mask |= 1u << vk_queue_to_radv(device->physical_device,
1837 pCreateInfo->pQueueFamilyIndices[i]);
1838 }
1839
1840 const VkExternalMemoryImageCreateInfo *external_info =
1841 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1842
1843 image->shareable = external_info;
1844 if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1845 !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1846 pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1847 image->info.surf_index = &device->image_mrt_offset_counter;
1848 }
1849
1850 if (mod_list)
1851 modifier = radv_select_modifier(device, format, mod_list);
1852 else if (explicit_mod)
1853 modifier = explicit_mod->drmFormatModifier;
1854
1855 for (unsigned plane = 0; plane < plane_count; ++plane) {
1856 image->planes[plane].surface.flags =
1857 radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1858 image->planes[plane].surface.modifier = modifier;
1859 }
1860
1861 bool delay_layout =
1862 external_info && (external_info->handleTypes &
1863 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1864
1865 if (delay_layout) {
1866 *pImage = radv_image_to_handle(image);
1867 assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1868 return VK_SUCCESS;
1869 }
1870
1871 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1872 if (result != VK_SUCCESS) {
1873 radv_destroy_image(device, alloc, image);
1874 return result;
1875 }
1876
1877 if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1878 image->alignment = MAX2(image->alignment, 4096);
1879 image->size = align64(image->size, image->alignment);
1880 image->offset = 0;
1881
1882 result =
1883 device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1884 RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
1885 if (result != VK_SUCCESS) {
1886 radv_destroy_image(device, alloc, image);
1887 return vk_error(device, result);
1888 }
1889 }
1890
1891 if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1892 radv_image_print_info(device, image);
1893 }
1894
1895 *pImage = radv_image_to_handle(image);
1896
1897 return VK_SUCCESS;
1898 }
1899
1900 static void
radv_image_view_make_descriptor(struct radv_image_view * iview,struct radv_device * device,VkFormat vk_format,const VkComponentMapping * components,float min_lod,bool is_storage_image,bool disable_compression,bool enable_compression,unsigned plane_id,unsigned descriptor_plane_id)1901 radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1902 VkFormat vk_format, const VkComponentMapping *components,
1903 float min_lod,
1904 bool is_storage_image, bool disable_compression,
1905 bool enable_compression, unsigned plane_id,
1906 unsigned descriptor_plane_id)
1907 {
1908 struct radv_image *image = iview->image;
1909 struct radv_image_plane *plane = &image->planes[plane_id];
1910 bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1911 uint32_t blk_w;
1912 union radv_descriptor *descriptor;
1913 uint32_t hw_level = 0;
1914
1915 if (is_storage_image) {
1916 descriptor = &iview->storage_descriptor;
1917 } else {
1918 descriptor = &iview->descriptor;
1919 }
1920
1921 assert(vk_format_get_plane_count(vk_format) == 1);
1922 assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1923 blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1924 vk_format_get_blockwidth(vk_format);
1925
1926 if (device->physical_device->rad_info.chip_class >= GFX9)
1927 hw_level = iview->base_mip;
1928 radv_make_texture_descriptor(
1929 device, image, is_storage_image, iview->type, vk_format, components, hw_level,
1930 hw_level + iview->level_count - 1, iview->base_layer,
1931 iview->base_layer + iview->layer_count - 1,
1932 vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
1933 vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
1934 iview->extent.depth, min_lod, descriptor->plane_descriptors[descriptor_plane_id],
1935 descriptor_plane_id || is_storage_image ? NULL : descriptor->fmask_descriptor);
1936
1937 const struct legacy_surf_level *base_level_info = NULL;
1938 if (device->physical_device->rad_info.chip_class <= GFX9) {
1939 if (is_stencil)
1940 base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip];
1941 else
1942 base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1943 }
1944
1945 bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
1946 if (is_storage_image && !(enable_write_compression || enable_compression))
1947 disable_compression = true;
1948 si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
1949 iview->base_mip, blk_w, is_stencil, is_storage_image,
1950 disable_compression, enable_write_compression,
1951 descriptor->plane_descriptors[descriptor_plane_id]);
1952 }
1953
1954 static unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1955 radv_plane_from_aspect(VkImageAspectFlags mask)
1956 {
1957 switch (mask) {
1958 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1959 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1960 return 1;
1961 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1962 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1963 return 2;
1964 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1965 return 3;
1966 default:
1967 return 0;
1968 }
1969 }
1970
1971 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1972 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1973 {
1974 switch (mask) {
1975 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1976 return image->planes[0].format;
1977 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1978 return image->planes[1].format;
1979 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1980 return image->planes[2].format;
1981 case VK_IMAGE_ASPECT_STENCIL_BIT:
1982 return vk_format_stencil_only(image->vk_format);
1983 case VK_IMAGE_ASPECT_DEPTH_BIT:
1984 return vk_format_depth_only(image->vk_format);
1985 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1986 return vk_format_depth_only(image->vk_format);
1987 default:
1988 return image->vk_format;
1989 }
1990 }
1991
1992 /**
1993 * Determine if the given image view can be fast cleared.
1994 */
1995 static bool
radv_image_view_can_fast_clear(const struct radv_device * device,const struct radv_image_view * iview)1996 radv_image_view_can_fast_clear(const struct radv_device *device,
1997 const struct radv_image_view *iview)
1998 {
1999 struct radv_image *image;
2000
2001 if (!iview)
2002 return false;
2003 image = iview->image;
2004
2005 /* Only fast clear if the image itself can be fast cleared. */
2006 if (!radv_image_can_fast_clear(device, image))
2007 return false;
2008
2009 /* Only fast clear if all layers are bound. */
2010 if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
2011 return false;
2012
2013 /* Only fast clear if the view covers the whole image. */
2014 if (!radv_image_extent_compare(image, &iview->extent))
2015 return false;
2016
2017 return true;
2018 }
2019
2020 void
radv_image_view_init(struct radv_image_view * iview,struct radv_device * device,const VkImageViewCreateInfo * pCreateInfo,const struct radv_image_view_extra_create_info * extra_create_info)2021 radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
2022 const VkImageViewCreateInfo *pCreateInfo,
2023 const struct radv_image_view_extra_create_info *extra_create_info)
2024 {
2025 RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
2026 const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
2027 uint32_t plane_count = 1;
2028 float min_lod = 0.0f;
2029
2030 const struct VkImageViewMinLodCreateInfoEXT *min_lod_info =
2031 vk_find_struct_const(pCreateInfo->pNext, IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT);
2032
2033 if (min_lod_info)
2034 min_lod = min_lod_info->minLod;
2035
2036 vk_object_base_init(&device->vk, &iview->base, VK_OBJECT_TYPE_IMAGE_VIEW);
2037
2038 switch (image->type) {
2039 case VK_IMAGE_TYPE_1D:
2040 case VK_IMAGE_TYPE_2D:
2041 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
2042 image->info.array_size);
2043 break;
2044 case VK_IMAGE_TYPE_3D:
2045 assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
2046 radv_minify(image->info.depth, range->baseMipLevel));
2047 break;
2048 default:
2049 unreachable("bad VkImageType");
2050 }
2051 iview->image = image;
2052 iview->type = pCreateInfo->viewType;
2053 iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
2054 iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
2055 iview->base_layer = range->baseArrayLayer;
2056 iview->layer_count = radv_get_layerCount(image, range);
2057 iview->base_mip = range->baseMipLevel;
2058 iview->level_count = radv_get_levelCount(image, range);
2059
2060 iview->vk_format = pCreateInfo->format;
2061
2062 /* If the image has an Android external format, pCreateInfo->format will be
2063 * VK_FORMAT_UNDEFINED. */
2064 if (iview->vk_format == VK_FORMAT_UNDEFINED)
2065 iview->vk_format = image->vk_format;
2066
2067 /* Split out the right aspect. Note that for internal meta code we sometimes
2068 * use an equivalent color format for the aspect so we first have to check
2069 * if we actually got depth/stencil formats. */
2070 if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
2071 if (vk_format_has_stencil(iview->vk_format))
2072 iview->vk_format = vk_format_stencil_only(iview->vk_format);
2073 } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
2074 if (vk_format_has_depth(iview->vk_format))
2075 iview->vk_format = vk_format_depth_only(iview->vk_format);
2076 }
2077
2078 if (vk_format_get_plane_count(image->vk_format) > 1 &&
2079 iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
2080 plane_count = vk_format_get_plane_count(iview->vk_format);
2081 }
2082
2083 if (device->physical_device->emulate_etc2 &&
2084 vk_format_description(image->vk_format)->layout == UTIL_FORMAT_LAYOUT_ETC) {
2085 const struct util_format_description *desc = vk_format_description(iview->vk_format);
2086 assert(desc);
2087 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2088 iview->plane_id = 1;
2089 iview->vk_format = etc2_emulation_format(iview->vk_format);
2090 }
2091
2092 plane_count = 1;
2093 }
2094
2095 if (device->physical_device->rad_info.chip_class >= GFX9) {
2096 iview->extent = (VkExtent3D){
2097 .width = image->info.width,
2098 .height = image->info.height,
2099 .depth = image->info.depth,
2100 };
2101 } else {
2102 iview->extent = (VkExtent3D){
2103 .width = radv_minify(image->info.width, range->baseMipLevel),
2104 .height = radv_minify(image->info.height, range->baseMipLevel),
2105 .depth = radv_minify(image->info.depth, range->baseMipLevel),
2106 };
2107 }
2108
2109 if (iview->vk_format != image->planes[iview->plane_id].format) {
2110 unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
2111 unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
2112 unsigned img_bw = vk_format_get_blockwidth(image->planes[iview->plane_id].format);
2113 unsigned img_bh = vk_format_get_blockheight(image->planes[iview->plane_id].format);
2114
2115 iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
2116 iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
2117
2118 /* Comment ported from amdvlk -
2119 * If we have the following image:
2120 * Uncompressed pixels Compressed block sizes (4x4)
2121 * mip0: 22 x 22 6 x 6
2122 * mip1: 11 x 11 3 x 3
2123 * mip2: 5 x 5 2 x 2
2124 * mip3: 2 x 2 1 x 1
2125 * mip4: 1 x 1 1 x 1
2126 *
2127 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
2128 * the HW is calculating the degradation of the block sizes down the mip-chain as follows
2129 * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
2130 *
2131 * This means that mip2 will be missing texels.
2132 *
2133 * Fix this by calculating the base mip's width and height, then convert
2134 * that, and round it back up to get the level 0 size. Clamp the
2135 * converted size between the original values, and the physical extent
2136 * of the base mipmap.
2137 *
2138 * On GFX10 we have to take care to not go over the physical extent
2139 * of the base mipmap as otherwise the GPU computes a different layout.
2140 * Note that the GPU does use the same base-mip dimensions for both a
2141 * block compatible format and the compressed format, so even if we take
2142 * the plain converted dimensions the physical layout is correct.
2143 */
2144 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2145 vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
2146 /* If we have multiple levels in the view we should ideally take the last level,
2147 * but the mip calculation has a max(..., 1) so walking back to the base mip in an
2148 * useful way is hard. */
2149 if (iview->level_count > 1) {
2150 iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
2151 iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
2152 } else {
2153 unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
2154 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
2155
2156 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
2157 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
2158
2159 lvl_width <<= range->baseMipLevel;
2160 lvl_height <<= range->baseMipLevel;
2161
2162 iview->extent.width = CLAMP(lvl_width, iview->extent.width,
2163 iview->image->planes[0].surface.u.gfx9.base_mip_width);
2164 iview->extent.height = CLAMP(lvl_height, iview->extent.height,
2165 iview->image->planes[0].surface.u.gfx9.base_mip_height);
2166 }
2167 }
2168 }
2169
2170 iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
2171
2172 bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
2173 bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
2174 for (unsigned i = 0; i < plane_count; ++i) {
2175 VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
2176 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, false,
2177 disable_compression, enable_compression, iview->plane_id + i,
2178 i);
2179 radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, min_lod, true,
2180 disable_compression, enable_compression, iview->plane_id + i,
2181 i);
2182 }
2183 }
2184
2185 void
radv_image_view_finish(struct radv_image_view * iview)2186 radv_image_view_finish(struct radv_image_view *iview)
2187 {
2188 vk_object_base_finish(&iview->base);
2189 }
2190
2191 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2192 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
2193 VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
2194 {
2195 switch (layout) {
2196 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
2197 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
2198 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
2199 case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR:
2200 return radv_image_has_htile(image);
2201 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
2202 return radv_image_is_tc_compat_htile(image) ||
2203 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
2204 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
2205 case VK_IMAGE_LAYOUT_GENERAL:
2206 /* It should be safe to enable TC-compat HTILE with
2207 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
2208 * if the image doesn't have the storage bit set. This
2209 * improves performance for apps that use GENERAL for the main
2210 * depth pass because this allows compression and this reduces
2211 * the number of decompressions from/to GENERAL.
2212 */
2213 /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2214 * queue is likely broken for eg. depth/stencil copies.
2215 */
2216 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2217 !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
2218 return true;
2219 } else {
2220 return false;
2221 }
2222 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2223 if (radv_image_is_tc_compat_htile(image) ||
2224 (radv_image_has_htile(image) &&
2225 !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2226 /* Keep HTILE compressed if the image is only going to
2227 * be used as a depth/stencil read-only attachment.
2228 */
2229 return true;
2230 } else {
2231 return false;
2232 }
2233 break;
2234 default:
2235 return radv_image_is_tc_compat_htile(image);
2236 }
2237 }
2238
2239 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2240 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2241 unsigned level, VkImageLayout layout, bool in_render_loop,
2242 unsigned queue_mask)
2243 {
2244 if (radv_dcc_enabled(image, level) &&
2245 !radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask))
2246 return false;
2247
2248 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2249 return false;
2250
2251 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
2252 layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR)
2253 return false;
2254
2255 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
2256 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
2257 * FCE on the compute queue.
2258 */
2259 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
2260 }
2261
2262 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,bool in_render_loop,unsigned queue_mask)2263 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2264 unsigned level, VkImageLayout layout, bool in_render_loop,
2265 unsigned queue_mask)
2266 {
2267 if (!radv_dcc_enabled(image, level))
2268 return false;
2269
2270 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
2271 return true;
2272
2273 /* If the image is read-only, we can always just keep it compressed */
2274 if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2275 return true;
2276
2277 /* Don't compress compute transfer dst when image stores are not supported. */
2278 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2279 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2280 return false;
2281
2282 return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2283 }
2284
2285 bool
radv_layout_fmask_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)2286 radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2287 VkImageLayout layout, unsigned queue_mask)
2288 {
2289 if (!radv_image_has_fmask(image))
2290 return false;
2291
2292 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2293 * expanded before.
2294 */
2295 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2296 (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2297 return false;
2298
2299 /* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2300 return layout != VK_IMAGE_LAYOUT_GENERAL &&
2301 (queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2302 }
2303
2304 unsigned
radv_image_queue_family_mask(const struct radv_image * image,enum radv_queue_family family,enum radv_queue_family queue_family)2305 radv_image_queue_family_mask(const struct radv_image *image,
2306 enum radv_queue_family family,
2307 enum radv_queue_family queue_family)
2308 {
2309 if (!image->exclusive)
2310 return image->queue_family_mask;
2311 if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
2312 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2313 if (family == VK_QUEUE_FAMILY_IGNORED)
2314 return 1u << queue_family;
2315 return 1u << family;
2316 }
2317
2318 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImage(VkDevice device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)2319 radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
2320 const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2321 {
2322 #ifdef ANDROID
2323 const VkNativeBufferANDROID *gralloc_info =
2324 vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2325
2326 if (gralloc_info)
2327 return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
2328 #endif
2329
2330 const struct wsi_image_create_info *wsi_info =
2331 vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2332 bool scanout = wsi_info && wsi_info->scanout;
2333 bool prime_blit_src = wsi_info && wsi_info->buffer_blit_src;
2334
2335 return radv_image_create(device,
2336 &(struct radv_image_create_info){
2337 .vk_info = pCreateInfo,
2338 .scanout = scanout,
2339 .prime_blit_src = prime_blit_src,
2340 },
2341 pAllocator, pImage);
2342 }
2343
2344 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)2345 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2346 {
2347 RADV_FROM_HANDLE(radv_device, device, _device);
2348 RADV_FROM_HANDLE(radv_image, image, _image);
2349
2350 if (!image)
2351 return;
2352
2353 radv_destroy_image(device, pAllocator, image);
2354 }
2355
2356 VKAPI_ATTR void VKAPI_CALL
radv_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)2357 radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2358 const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2359 {
2360 RADV_FROM_HANDLE(radv_image, image, _image);
2361 RADV_FROM_HANDLE(radv_device, device, _device);
2362 int level = pSubresource->mipLevel;
2363 int layer = pSubresource->arrayLayer;
2364
2365 unsigned plane_id = 0;
2366 if (vk_format_get_plane_count(image->vk_format) > 1)
2367 plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2368
2369 struct radv_image_plane *plane = &image->planes[plane_id];
2370 struct radeon_surf *surface = &plane->surface;
2371
2372 if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2373 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2374
2375 assert(level == 0);
2376 assert(layer == 0);
2377
2378 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2379 surface, mem_plane_id, 0);
2380 pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
2381 surface, mem_plane_id, level);
2382 pLayout->arrayPitch = 0;
2383 pLayout->depthPitch = 0;
2384 pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2385 } else if (device->physical_device->rad_info.chip_class >= GFX9) {
2386 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2387
2388 pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2389 &plane->surface, 0, layer) +
2390 level_offset;
2391 if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
2392 image->vk_format == VK_FORMAT_R32G32B32_SINT ||
2393 image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
2394 /* Adjust the number of bytes between each row because
2395 * the pitch is actually the number of components per
2396 * row.
2397 */
2398 pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2399 } else {
2400 uint32_t pitch =
2401 surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2402
2403 assert(util_is_power_of_two_nonzero(surface->bpe));
2404 pLayout->rowPitch = pitch * surface->bpe;
2405 }
2406
2407 pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2408 pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2409 pLayout->size = surface->u.gfx9.surf_slice_size;
2410 if (image->type == VK_IMAGE_TYPE_3D)
2411 pLayout->size *= u_minify(image->info.depth, level);
2412 } else {
2413 pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2414 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2415 pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2416 pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2417 pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2418 pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2419 if (image->type == VK_IMAGE_TYPE_3D)
2420 pLayout->size *= u_minify(image->info.depth, level);
2421 }
2422 }
2423
2424 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)2425 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2426 VkImageDrmFormatModifierPropertiesEXT *pProperties)
2427 {
2428 RADV_FROM_HANDLE(radv_image, image, _image);
2429
2430 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2431 return VK_SUCCESS;
2432 }
2433
2434 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)2435 radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2436 const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2437 {
2438 RADV_FROM_HANDLE(radv_device, device, _device);
2439 struct radv_image_view *view;
2440
2441 view =
2442 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2443 if (view == NULL)
2444 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2445
2446 radv_image_view_init(view, device, pCreateInfo, NULL);
2447
2448 *pView = radv_image_view_to_handle(view);
2449
2450 return VK_SUCCESS;
2451 }
2452
2453 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)2454 radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2455 {
2456 RADV_FROM_HANDLE(radv_device, device, _device);
2457 RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2458
2459 if (!iview)
2460 return;
2461
2462 radv_image_view_finish(iview);
2463 vk_free2(&device->vk.alloc, pAllocator, iview);
2464 }
2465
2466 void
radv_buffer_view_init(struct radv_buffer_view * view,struct radv_device * device,const VkBufferViewCreateInfo * pCreateInfo)2467 radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2468 const VkBufferViewCreateInfo *pCreateInfo)
2469 {
2470 RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2471
2472 vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2473
2474 view->bo = buffer->bo;
2475 view->range =
2476 pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
2477 view->vk_format = pCreateInfo->format;
2478
2479 radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
2480 view->state);
2481 }
2482
2483 void
radv_buffer_view_finish(struct radv_buffer_view * view)2484 radv_buffer_view_finish(struct radv_buffer_view *view)
2485 {
2486 vk_object_base_finish(&view->base);
2487 }
2488
2489 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)2490 radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2491 const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2492 {
2493 RADV_FROM_HANDLE(radv_device, device, _device);
2494 struct radv_buffer_view *view;
2495
2496 view =
2497 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2498 if (!view)
2499 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2500
2501 radv_buffer_view_init(view, device, pCreateInfo);
2502
2503 *pView = radv_buffer_view_to_handle(view);
2504
2505 return VK_SUCCESS;
2506 }
2507
2508 VKAPI_ATTR void VKAPI_CALL
radv_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)2509 radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2510 const VkAllocationCallbacks *pAllocator)
2511 {
2512 RADV_FROM_HANDLE(radv_device, device, _device);
2513 RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2514
2515 if (!view)
2516 return;
2517
2518 radv_buffer_view_finish(view);
2519 vk_free2(&device->vk.alloc, pAllocator, view);
2520 }
2521