1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25  * DEALINGS IN THE SOFTWARE.
26  */
27 
28 #include "tu_private.h"
29 #include "fdl/fd6_format_table.h"
30 
31 #include "util/debug.h"
32 #include "util/u_atomic.h"
33 #include "util/format/u_format.h"
34 #include "vk_format.h"
35 #include "vk_util.h"
36 #include "drm-uapi/drm_fourcc.h"
37 
38 #include "tu_cs.h"
39 
40 static uint32_t
tu6_plane_count(VkFormat format)41 tu6_plane_count(VkFormat format)
42 {
43    switch (format) {
44    default:
45       return 1;
46    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
47    case VK_FORMAT_D32_SFLOAT_S8_UINT:
48       return 2;
49    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
50       return 3;
51    }
52 }
53 
54 static VkFormat
tu6_plane_format(VkFormat format,uint32_t plane)55 tu6_plane_format(VkFormat format, uint32_t plane)
56 {
57    switch (format) {
58    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
59       /* note: with UBWC, and Y plane UBWC is different from R8_UNORM */
60       return plane ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
61    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
62       return VK_FORMAT_R8_UNORM;
63    case VK_FORMAT_D32_SFLOAT_S8_UINT:
64       return plane ? VK_FORMAT_S8_UINT : VK_FORMAT_D32_SFLOAT;
65    default:
66       return format;
67    }
68 }
69 
70 static uint32_t
tu6_plane_index(VkFormat format,VkImageAspectFlags aspect_mask)71 tu6_plane_index(VkFormat format, VkImageAspectFlags aspect_mask)
72 {
73    switch (aspect_mask) {
74    default:
75       return 0;
76    case VK_IMAGE_ASPECT_PLANE_1_BIT:
77       return 1;
78    case VK_IMAGE_ASPECT_PLANE_2_BIT:
79       return 2;
80    case VK_IMAGE_ASPECT_STENCIL_BIT:
81       return format == VK_FORMAT_D32_SFLOAT_S8_UINT;
82    }
83 }
84 
85 static void
compose_swizzle(unsigned char * swiz,const VkComponentMapping * mapping)86 compose_swizzle(unsigned char *swiz, const VkComponentMapping *mapping)
87 {
88    unsigned char src_swiz[4] = { swiz[0], swiz[1], swiz[2], swiz[3] };
89    VkComponentSwizzle vk_swiz[4] = {
90       mapping->r, mapping->g, mapping->b, mapping->a
91    };
92    for (int i = 0; i < 4; i++) {
93       switch (vk_swiz[i]) {
94       case VK_COMPONENT_SWIZZLE_IDENTITY:
95          swiz[i] = src_swiz[i];
96          break;
97       case VK_COMPONENT_SWIZZLE_R...VK_COMPONENT_SWIZZLE_A:
98          swiz[i] = src_swiz[vk_swiz[i] - VK_COMPONENT_SWIZZLE_R];
99          break;
100       case VK_COMPONENT_SWIZZLE_ZERO:
101          swiz[i] = A6XX_TEX_ZERO;
102          break;
103       case VK_COMPONENT_SWIZZLE_ONE:
104          swiz[i] = A6XX_TEX_ONE;
105          break;
106       default:
107          unreachable("unexpected swizzle");
108       }
109    }
110 }
111 
112 static uint32_t
tu6_texswiz(const VkComponentMapping * comps,const struct tu_sampler_ycbcr_conversion * conversion,VkFormat format,VkImageAspectFlagBits aspect_mask,bool has_z24uint_s8uint)113 tu6_texswiz(const VkComponentMapping *comps,
114             const struct tu_sampler_ycbcr_conversion *conversion,
115             VkFormat format,
116             VkImageAspectFlagBits aspect_mask,
117             bool has_z24uint_s8uint)
118 {
119    unsigned char swiz[4] = {
120       A6XX_TEX_X, A6XX_TEX_Y, A6XX_TEX_Z, A6XX_TEX_W,
121    };
122 
123    switch (format) {
124    case VK_FORMAT_G8B8G8R8_422_UNORM:
125    case VK_FORMAT_B8G8R8G8_422_UNORM:
126    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
127    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
128       swiz[0] = A6XX_TEX_Z;
129       swiz[1] = A6XX_TEX_X;
130       swiz[2] = A6XX_TEX_Y;
131       break;
132    case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
133    case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
134       /* same hardware format is used for BC1_RGB / BC1_RGBA */
135       swiz[3] = A6XX_TEX_ONE;
136       break;
137    case VK_FORMAT_D24_UNORM_S8_UINT:
138       if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
139          if (!has_z24uint_s8uint) {
140             /* using FMT6_8_8_8_8_UINT, so need to pick out the W channel and
141              * swizzle (0,0,1) in the rest (see "Conversion to RGBA").
142              */
143             swiz[0] = A6XX_TEX_W;
144             swiz[1] = A6XX_TEX_ZERO;
145             swiz[2] = A6XX_TEX_ZERO;
146             swiz[3] = A6XX_TEX_ONE;
147          } else {
148             /* using FMT6_Z24_UINT_S8_UINT, which is (d, s, 0, 1), so need to
149              * swizzle away the d.
150              */
151             swiz[0] = A6XX_TEX_Y;
152             swiz[1] = A6XX_TEX_ZERO;
153          }
154       }
155       break;
156    default:
157       break;
158    }
159 
160    compose_swizzle(swiz, comps);
161    if (conversion)
162       compose_swizzle(swiz, &conversion->components);
163 
164    return A6XX_TEX_CONST_0_SWIZ_X(swiz[0]) |
165           A6XX_TEX_CONST_0_SWIZ_Y(swiz[1]) |
166           A6XX_TEX_CONST_0_SWIZ_Z(swiz[2]) |
167           A6XX_TEX_CONST_0_SWIZ_W(swiz[3]);
168 }
169 
170 void
tu_cs_image_ref(struct tu_cs * cs,const struct tu_image_view * iview,uint32_t layer)171 tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
172 {
173    tu_cs_emit(cs, iview->PITCH);
174    tu_cs_emit(cs, iview->layer_size >> 6);
175    tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
176 }
177 
178 void
tu_cs_image_stencil_ref(struct tu_cs * cs,const struct tu_image_view * iview,uint32_t layer)179 tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
180 {
181    tu_cs_emit(cs, iview->stencil_PITCH);
182    tu_cs_emit(cs, iview->stencil_layer_size >> 6);
183    tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
184 }
185 
186 void
tu_cs_image_ref_2d(struct tu_cs * cs,const struct tu_image_view * iview,uint32_t layer,bool src)187 tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src)
188 {
189    tu_cs_emit_qw(cs, iview->base_addr + iview->layer_size * layer);
190    /* SP_PS_2D_SRC_PITCH has shifted pitch field */
191    tu_cs_emit(cs, iview->PITCH << (src ? 9 : 0));
192 }
193 
194 void
tu_cs_image_flag_ref(struct tu_cs * cs,const struct tu_image_view * iview,uint32_t layer)195 tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
196 {
197    tu_cs_emit_qw(cs, iview->ubwc_addr + iview->ubwc_layer_size * layer);
198    tu_cs_emit(cs, iview->FLAG_BUFFER_PITCH);
199 }
200 
201 void
tu_image_view_init(struct tu_image_view * iview,const VkImageViewCreateInfo * pCreateInfo,bool has_z24uint_s8uint)202 tu_image_view_init(struct tu_image_view *iview,
203                    const VkImageViewCreateInfo *pCreateInfo,
204                    bool has_z24uint_s8uint)
205 {
206    TU_FROM_HANDLE(tu_image, image, pCreateInfo->image);
207    const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
208    VkFormat format = pCreateInfo->format;
209    VkImageAspectFlagBits aspect_mask = pCreateInfo->subresourceRange.aspectMask;
210 
211    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
212       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
213    const struct tu_sampler_ycbcr_conversion *conversion = ycbcr_conversion ?
214       tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
215 
216    iview->image = image;
217 
218    memset(iview->descriptor, 0, sizeof(iview->descriptor));
219 
220    struct fdl_layout *layout =
221       &image->layout[tu6_plane_index(image->vk_format, aspect_mask)];
222 
223    uint32_t width = u_minify(layout->width0, range->baseMipLevel);
224    uint32_t height = u_minify(layout->height0, range->baseMipLevel);
225    uint32_t storage_depth = tu_get_layerCount(image, range);
226    if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) {
227       storage_depth = u_minify(image->layout[0].depth0, range->baseMipLevel);
228    }
229 
230    uint32_t depth = storage_depth;
231    if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE ||
232        pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
233       /* Cubes are treated as 2D arrays for storage images, so only divide the
234        * depth by 6 for the texture descriptor.
235        */
236       depth /= 6;
237    }
238 
239    uint64_t base_addr = image->bo->iova + image->bo_offset +
240       fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
241    uint64_t ubwc_addr = image->bo->iova + image->bo_offset +
242       fdl_ubwc_offset(layout, range->baseMipLevel, range->baseArrayLayer);
243 
244    uint32_t pitch = fdl_pitch(layout, range->baseMipLevel);
245    uint32_t ubwc_pitch = fdl_ubwc_pitch(layout, range->baseMipLevel);
246    uint32_t layer_size = fdl_layer_stride(layout, range->baseMipLevel);
247 
248    if (aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
249       format = tu6_plane_format(format, tu6_plane_index(format, aspect_mask));
250 
251    struct tu_native_format fmt = tu6_format_texture(format, layout->tile_mode);
252    /* note: freedreno layout assumes no TILE_ALL bit for non-UBWC color formats
253     * this means smaller mipmap levels have a linear tile mode.
254     * Depth/stencil formats have non-linear tile mode.
255     */
256    fmt.tile_mode = fdl_tile_mode(layout, range->baseMipLevel);
257 
258    bool ubwc_enabled = fdl_ubwc_enabled(layout, range->baseMipLevel);
259 
260    bool is_d24s8 = (format == VK_FORMAT_D24_UNORM_S8_UINT ||
261                     format == VK_FORMAT_X8_D24_UNORM_PACK32);
262 
263    if (is_d24s8 && ubwc_enabled)
264       fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
265 
266    unsigned fmt_tex = fmt.fmt;
267    if (is_d24s8) {
268       if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
269          fmt_tex = FMT6_Z24_UNORM_S8_UINT;
270       if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
271          fmt_tex = has_z24uint_s8uint ? FMT6_Z24_UINT_S8_UINT : FMT6_8_8_8_8_UINT;
272       /* TODO: also use this format with storage descriptor ? */
273    }
274 
275    iview->descriptor[0] =
276       A6XX_TEX_CONST_0_TILE_MODE(fmt.tile_mode) |
277       COND(vk_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
278       A6XX_TEX_CONST_0_FMT(fmt_tex) |
279       A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(layout->nr_samples)) |
280       A6XX_TEX_CONST_0_SWAP(fmt.swap) |
281       tu6_texswiz(&pCreateInfo->components, conversion, format, aspect_mask, has_z24uint_s8uint) |
282       A6XX_TEX_CONST_0_MIPLVLS(tu_get_levelCount(image, range) - 1);
283    iview->descriptor[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
284    iview->descriptor[2] =
285       A6XX_TEX_CONST_2_PITCHALIGN(layout->pitchalign - 6) |
286       A6XX_TEX_CONST_2_PITCH(pitch) |
287       A6XX_TEX_CONST_2_TYPE(tu6_tex_type(pCreateInfo->viewType, false));
288    iview->descriptor[3] = A6XX_TEX_CONST_3_ARRAY_PITCH(layer_size);
289    iview->descriptor[4] = base_addr;
290    iview->descriptor[5] = (base_addr >> 32) | A6XX_TEX_CONST_5_DEPTH(depth);
291 
292    if (layout->tile_all)
293       iview->descriptor[3] |= A6XX_TEX_CONST_3_TILE_ALL;
294 
295    if (format == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM ||
296        format == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM) {
297       /* chroma offset re-uses MIPLVLS bits */
298       assert(tu_get_levelCount(image, range) == 1);
299       if (conversion) {
300          if (conversion->chroma_offsets[0] == VK_CHROMA_LOCATION_MIDPOINT)
301             iview->descriptor[0] |= A6XX_TEX_CONST_0_CHROMA_MIDPOINT_X;
302          if (conversion->chroma_offsets[1] == VK_CHROMA_LOCATION_MIDPOINT)
303             iview->descriptor[0] |= A6XX_TEX_CONST_0_CHROMA_MIDPOINT_Y;
304       }
305 
306       uint64_t base_addr[3];
307 
308       iview->descriptor[3] |= A6XX_TEX_CONST_3_TILE_ALL;
309       if (ubwc_enabled) {
310          iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG;
311          /* no separate ubwc base, image must have the expected layout */
312          for (uint32_t i = 0; i < 3; i++) {
313             base_addr[i] = image->bo->iova + image->bo_offset +
314                fdl_ubwc_offset(&image->layout[i], range->baseMipLevel, range->baseArrayLayer);
315          }
316       } else {
317          for (uint32_t i = 0; i < 3; i++) {
318             base_addr[i] = image->bo->iova + image->bo_offset +
319                fdl_surface_offset(&image->layout[i], range->baseMipLevel, range->baseArrayLayer);
320          }
321       }
322 
323       iview->descriptor[4] = base_addr[0];
324       iview->descriptor[5] |= base_addr[0] >> 32;
325       iview->descriptor[6] =
326          A6XX_TEX_CONST_6_PLANE_PITCH(fdl_pitch(&image->layout[1], range->baseMipLevel));
327       iview->descriptor[7] = base_addr[1];
328       iview->descriptor[8] = base_addr[1] >> 32;
329       iview->descriptor[9] = base_addr[2];
330       iview->descriptor[10] = base_addr[2] >> 32;
331 
332       assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_3D);
333       return;
334    }
335 
336    if (ubwc_enabled) {
337       uint32_t block_width, block_height;
338       fdl6_get_ubwc_blockwidth(layout, &block_width, &block_height);
339 
340       iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG;
341       iview->descriptor[7] = ubwc_addr;
342       iview->descriptor[8] = ubwc_addr >> 32;
343       iview->descriptor[9] |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(layout->ubwc_layer_size >> 2);
344       iview->descriptor[10] |=
345          A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_pitch) |
346          A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(width, block_width))) |
347          A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(height, block_height)));
348    }
349 
350    if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D) {
351       iview->descriptor[3] |=
352          A6XX_TEX_CONST_3_MIN_LAYERSZ(layout->slices[image->level_count - 1].size0);
353    }
354 
355    iview->SP_PS_2D_SRC_INFO = A6XX_SP_PS_2D_SRC_INFO(
356       .color_format = fmt.fmt,
357       .tile_mode = fmt.tile_mode,
358       .color_swap = fmt.swap,
359       .flags = ubwc_enabled,
360       .srgb = vk_format_is_srgb(format),
361       .samples = tu_msaa_samples(layout->nr_samples),
362       .samples_average = layout->nr_samples > 1 &&
363                            !vk_format_is_int(format) &&
364                            !vk_format_is_depth_or_stencil(format),
365       .unk20 = 1,
366       .unk22 = 1).value;
367    iview->SP_PS_2D_SRC_SIZE =
368       A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height).value;
369 
370    /* note: these have same encoding for MRT and 2D (except 2D PITCH src) */
371    iview->PITCH = A6XX_RB_DEPTH_BUFFER_PITCH(pitch).value;
372    iview->FLAG_BUFFER_PITCH = A6XX_RB_DEPTH_FLAG_BUFFER_PITCH(
373       .pitch = ubwc_pitch, .array_pitch = layout->ubwc_layer_size >> 2).value;
374 
375    iview->base_addr = base_addr;
376    iview->ubwc_addr = ubwc_addr;
377    iview->layer_size = layer_size;
378    iview->ubwc_layer_size = layout->ubwc_layer_size;
379 
380    /* Don't set fields that are only used for attachments/blit dest if COLOR
381     * is unsupported.
382     */
383    if (!tu6_format_color_supported(format))
384       return;
385 
386    struct tu_native_format cfmt = tu6_format_color(format, layout->tile_mode);
387    cfmt.tile_mode = fmt.tile_mode;
388 
389    if (is_d24s8 && ubwc_enabled)
390       cfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
391 
392    memset(iview->storage_descriptor, 0, sizeof(iview->storage_descriptor));
393 
394    iview->storage_descriptor[0] =
395       A6XX_IBO_0_FMT(fmt.fmt) |
396       A6XX_IBO_0_TILE_MODE(fmt.tile_mode);
397    iview->storage_descriptor[1] =
398       A6XX_IBO_1_WIDTH(width) |
399       A6XX_IBO_1_HEIGHT(height);
400    iview->storage_descriptor[2] =
401       A6XX_IBO_2_PITCH(pitch) |
402       A6XX_IBO_2_TYPE(tu6_tex_type(pCreateInfo->viewType, true));
403    iview->storage_descriptor[3] = A6XX_IBO_3_ARRAY_PITCH(layer_size);
404 
405    iview->storage_descriptor[4] = base_addr;
406    iview->storage_descriptor[5] = (base_addr >> 32) | A6XX_IBO_5_DEPTH(storage_depth);
407 
408    if (ubwc_enabled) {
409       iview->storage_descriptor[3] |= A6XX_IBO_3_FLAG | A6XX_IBO_3_UNK27;
410       iview->storage_descriptor[7] |= ubwc_addr;
411       iview->storage_descriptor[8] |= ubwc_addr >> 32;
412       iview->storage_descriptor[9] = A6XX_IBO_9_FLAG_BUFFER_ARRAY_PITCH(layout->ubwc_layer_size >> 2);
413       iview->storage_descriptor[10] =
414          A6XX_IBO_10_FLAG_BUFFER_PITCH(ubwc_pitch);
415    }
416 
417    iview->extent.width = width;
418    iview->extent.height = height;
419    iview->need_y2_align =
420       (fmt.tile_mode == TILE6_LINEAR && range->baseMipLevel != image->level_count - 1);
421 
422    iview->ubwc_enabled = ubwc_enabled;
423 
424    iview->RB_MRT_BUF_INFO = A6XX_RB_MRT_BUF_INFO(0,
425                               .color_tile_mode = cfmt.tile_mode,
426                               .color_format = cfmt.fmt,
427                               .color_swap = cfmt.swap).value;
428 
429    iview->SP_FS_MRT_REG = A6XX_SP_FS_MRT_REG(0,
430                               .color_format = cfmt.fmt,
431                               .color_sint = vk_format_is_sint(format),
432                               .color_uint = vk_format_is_uint(format)).value;
433 
434    iview->RB_2D_DST_INFO = A6XX_RB_2D_DST_INFO(
435       .color_format = cfmt.fmt,
436       .tile_mode = cfmt.tile_mode,
437       .color_swap = cfmt.swap,
438       .flags = ubwc_enabled,
439       .srgb = vk_format_is_srgb(format)).value;
440 
441    iview->RB_BLIT_DST_INFO = A6XX_RB_BLIT_DST_INFO(
442       .tile_mode = cfmt.tile_mode,
443       .samples = tu_msaa_samples(layout->nr_samples),
444       .color_format = cfmt.fmt,
445       .color_swap = cfmt.swap,
446       .flags = ubwc_enabled).value;
447 
448    if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
449       layout = &image->layout[1];
450       iview->stencil_base_addr = image->bo->iova + image->bo_offset +
451          fdl_surface_offset(layout, range->baseMipLevel, range->baseArrayLayer);
452       iview->stencil_layer_size = fdl_layer_stride(layout, range->baseMipLevel);
453       iview->stencil_PITCH = A6XX_RB_STENCIL_BUFFER_PITCH(fdl_pitch(layout, range->baseMipLevel)).value;
454    }
455 }
456 
457 bool
ubwc_possible(VkFormat format,VkImageType type,VkImageUsageFlags usage,VkImageUsageFlags stencil_usage,const struct fd_dev_info * info,VkSampleCountFlagBits samples)458 ubwc_possible(VkFormat format, VkImageType type, VkImageUsageFlags usage,
459               VkImageUsageFlags stencil_usage, const struct fd_dev_info *info,
460               VkSampleCountFlagBits samples)
461 {
462    /* no UBWC with compressed formats, E5B9G9R9, S8_UINT
463     * (S8_UINT because separate stencil doesn't have UBWC-enable bit)
464     */
465    if (vk_format_is_compressed(format) ||
466        format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 ||
467        format == VK_FORMAT_S8_UINT)
468       return false;
469 
470    if (!info->a6xx.has_8bpp_ubwc &&
471        (format == VK_FORMAT_R8_UNORM ||
472         format == VK_FORMAT_R8_SNORM ||
473         format == VK_FORMAT_R8_UINT ||
474         format == VK_FORMAT_R8_SINT ||
475         format == VK_FORMAT_R8_SRGB))
476       return false;
477 
478    if (type == VK_IMAGE_TYPE_3D) {
479       tu_finishme("UBWC with 3D textures");
480       return false;
481    }
482 
483    /* Disable UBWC for storage images.
484     *
485     * The closed GL driver skips UBWC for storage images (and additionally
486     * uses linear for writeonly images).  We seem to have image tiling working
487     * in freedreno in general, so turnip matches that.  freedreno also enables
488     * UBWC on images, but it's not really tested due to the lack of
489     * UBWC-enabled mipmaps in freedreno currently.  Just match the closed GL
490     * behavior of no UBWC.
491    */
492    if ((usage | stencil_usage) & VK_IMAGE_USAGE_STORAGE_BIT)
493       return false;
494 
495    /* Disable UBWC for D24S8 on A630 in some cases
496     *
497     * VK_IMAGE_ASPECT_STENCIL_BIT image view requires to be able to sample
498     * from the stencil component as UINT, however no format allows this
499     * on a630 (the special FMT6_Z24_UINT_S8_UINT format is missing)
500     *
501     * It must be sampled as FMT6_8_8_8_8_UINT, which is not UBWC-compatible
502     *
503     * Additionally, the special AS_R8G8B8A8 format is broken without UBWC,
504     * so we have to fallback to 8_8_8_8_UNORM when UBWC is disabled
505     */
506    if (!info->a6xx.has_z24uint_s8uint &&
507        format == VK_FORMAT_D24_UNORM_S8_UINT &&
508        (stencil_usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))
509       return false;
510 
511    if (!info->a6xx.has_z24uint_s8uint && samples > VK_SAMPLE_COUNT_1_BIT)
512       return false;
513 
514    return true;
515 }
516 
517 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateImage(VkDevice _device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc,VkImage * pImage)518 tu_CreateImage(VkDevice _device,
519                const VkImageCreateInfo *pCreateInfo,
520                const VkAllocationCallbacks *alloc,
521                VkImage *pImage)
522 {
523    TU_FROM_HANDLE(tu_device, device, _device);
524    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
525    const VkSubresourceLayout *plane_layouts = NULL;
526    struct tu_image *image;
527 
528    if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
529       const VkImageDrmFormatModifierListCreateInfoEXT *mod_info =
530          vk_find_struct_const(pCreateInfo->pNext,
531                               IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
532       const VkImageDrmFormatModifierExplicitCreateInfoEXT *drm_explicit_info =
533          vk_find_struct_const(pCreateInfo->pNext,
534                               IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
535 
536       assert(mod_info || drm_explicit_info);
537 
538       if (mod_info) {
539          modifier = DRM_FORMAT_MOD_LINEAR;
540          for (unsigned i = 0; i < mod_info->drmFormatModifierCount; i++) {
541             if (mod_info->pDrmFormatModifiers[i] == DRM_FORMAT_MOD_QCOM_COMPRESSED)
542                modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
543          }
544       } else {
545          modifier = drm_explicit_info->drmFormatModifier;
546          assert(modifier == DRM_FORMAT_MOD_LINEAR ||
547                 modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED);
548          plane_layouts = drm_explicit_info->pPlaneLayouts;
549       }
550    } else {
551       const struct wsi_image_create_info *wsi_info =
552          vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
553       if (wsi_info && wsi_info->scanout)
554          modifier = DRM_FORMAT_MOD_LINEAR;
555    }
556 
557 #ifdef ANDROID
558    const VkNativeBufferANDROID *gralloc_info =
559       vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
560    int dma_buf;
561    if (gralloc_info) {
562       VkResult result = tu_gralloc_info(device, gralloc_info, &dma_buf, &modifier);
563       if (result != VK_SUCCESS)
564          return result;
565    }
566 #endif
567 
568    image = vk_object_zalloc(&device->vk, alloc, sizeof(*image),
569                             VK_OBJECT_TYPE_IMAGE);
570    if (!image)
571       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
572 
573    const VkExternalMemoryImageCreateInfo *external_info =
574       vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
575    image->shareable = external_info != NULL;
576 
577    image->vk_format = pCreateInfo->format;
578    image->level_count = pCreateInfo->mipLevels;
579    image->layer_count = pCreateInfo->arrayLayers;
580 
581    enum a6xx_tile_mode tile_mode = TILE6_3;
582    bool ubwc_enabled =
583       !(device->physical_device->instance->debug_flags & TU_DEBUG_NOUBWC);
584 
585    /* use linear tiling if requested */
586    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR || modifier == DRM_FORMAT_MOD_LINEAR) {
587       tile_mode = TILE6_LINEAR;
588       ubwc_enabled = false;
589    }
590 
591    /* Mutable images can be reinterpreted as any other compatible format.
592     * This is a problem with UBWC (compression for different formats is different),
593     * but also tiling ("swap" affects how tiled formats are stored in memory)
594     * Depth and stencil formats cannot be reintepreted as another format, and
595     * cannot be linear with sysmem rendering, so don't fall back for those.
596     *
597     * TODO:
598     * - if the fmt_list contains only formats which are swapped, but compatible
599     *   with each other (B8G8R8A8_UNORM and B8G8R8A8_UINT for example), then
600     *   tiling is still possible
601     * - figure out which UBWC compressions are compatible to keep it enabled
602     */
603    if ((pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
604        !vk_format_is_depth_or_stencil(image->vk_format)) {
605       const VkImageFormatListCreateInfo *fmt_list =
606          vk_find_struct_const(pCreateInfo->pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
607       bool may_be_swapped = true;
608       if (fmt_list) {
609          may_be_swapped = false;
610          for (uint32_t i = 0; i < fmt_list->viewFormatCount; i++) {
611             if (tu6_format_texture(fmt_list->pViewFormats[i], TILE6_LINEAR).swap) {
612                may_be_swapped = true;
613                break;
614             }
615          }
616       }
617       if (may_be_swapped)
618          tile_mode = TILE6_LINEAR;
619       ubwc_enabled = false;
620    }
621 
622    const VkImageStencilUsageCreateInfo *stencil_usage_info =
623       vk_find_struct_const(pCreateInfo->pNext, IMAGE_STENCIL_USAGE_CREATE_INFO);
624 
625    if (!ubwc_possible(image->vk_format, pCreateInfo->imageType, pCreateInfo->usage,
626                       stencil_usage_info ? stencil_usage_info->stencilUsage : pCreateInfo->usage,
627                       device->physical_device->info, pCreateInfo->samples))
628       ubwc_enabled = false;
629 
630    /* expect UBWC enabled if we asked for it */
631    assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
632 
633    for (uint32_t i = 0; i < tu6_plane_count(image->vk_format); i++) {
634       struct fdl_layout *layout = &image->layout[i];
635       VkFormat format = tu6_plane_format(image->vk_format, i);
636       uint32_t width0 = pCreateInfo->extent.width;
637       uint32_t height0 = pCreateInfo->extent.height;
638 
639       if (i > 0) {
640          switch (image->vk_format) {
641          case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
642          case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
643             /* half width/height on chroma planes */
644             width0 = (width0 + 1) >> 1;
645             height0 = (height0 + 1) >> 1;
646             break;
647          case VK_FORMAT_D32_SFLOAT_S8_UINT:
648             /* no UBWC for separate stencil */
649             ubwc_enabled = false;
650             break;
651          default:
652             break;
653          }
654       }
655 
656       struct fdl_explicit_layout plane_layout;
657 
658       if (plane_layouts) {
659          /* only expect simple 2D images for now */
660          if (pCreateInfo->mipLevels != 1 ||
661             pCreateInfo->arrayLayers != 1 ||
662             pCreateInfo->extent.depth != 1)
663             goto invalid_layout;
664 
665          plane_layout.offset = plane_layouts[i].offset;
666          plane_layout.pitch = plane_layouts[i].rowPitch;
667          /* note: use plane_layouts[0].arrayPitch to support array formats */
668       }
669 
670       layout->tile_mode = tile_mode;
671       layout->ubwc = ubwc_enabled;
672 
673       if (!fdl6_layout(layout, vk_format_to_pipe_format(format),
674                        pCreateInfo->samples,
675                        width0, height0,
676                        pCreateInfo->extent.depth,
677                        pCreateInfo->mipLevels,
678                        pCreateInfo->arrayLayers,
679                        pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
680                        plane_layouts ? &plane_layout : NULL)) {
681          assert(plane_layouts); /* can only fail with explicit layout */
682          goto invalid_layout;
683       }
684 
685       /* fdl6_layout can't take explicit offset without explicit pitch
686        * add offset manually for extra layouts for planes
687        */
688       if (!plane_layouts && i > 0) {
689          uint32_t offset = ALIGN_POT(image->total_size, 4096);
690          for (int i = 0; i < pCreateInfo->mipLevels; i++) {
691             layout->slices[i].offset += offset;
692             layout->ubwc_slices[i].offset += offset;
693          }
694          layout->size += offset;
695       }
696 
697       image->total_size = MAX2(image->total_size, layout->size);
698    }
699 
700    const struct util_format_description *desc = util_format_description(image->layout[0].format);
701    if (util_format_has_depth(desc) && !(device->instance->debug_flags & TU_DEBUG_NOLRZ))
702    {
703       /* Depth plane is the first one */
704       struct fdl_layout *layout = &image->layout[0];
705       unsigned width = layout->width0;
706       unsigned height = layout->height0;
707 
708       /* LRZ buffer is super-sampled */
709       switch (layout->nr_samples) {
710       case 4:
711          width *= 2;
712          FALLTHROUGH;
713       case 2:
714          height *= 2;
715          break;
716       default:
717          break;
718       }
719 
720       unsigned lrz_pitch  = align(DIV_ROUND_UP(width, 8), 32);
721       unsigned lrz_height = align(DIV_ROUND_UP(height, 8), 16);
722 
723       image->lrz_height = lrz_height;
724       image->lrz_pitch = lrz_pitch;
725       image->lrz_offset = image->total_size;
726       unsigned lrz_size = lrz_pitch * lrz_height * 2;
727       image->total_size += lrz_size;
728    }
729 
730    *pImage = tu_image_to_handle(image);
731 
732 #ifdef ANDROID
733    if (gralloc_info)
734       return tu_import_memory_from_gralloc_handle(_device, dma_buf, alloc, *pImage);
735 #endif
736    return VK_SUCCESS;
737 
738 invalid_layout:
739    vk_object_free(&device->vk, alloc, image);
740    return vk_error(device, VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
741 }
742 
743 VKAPI_ATTR void VKAPI_CALL
tu_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)744 tu_DestroyImage(VkDevice _device,
745                 VkImage _image,
746                 const VkAllocationCallbacks *pAllocator)
747 {
748    TU_FROM_HANDLE(tu_device, device, _device);
749    TU_FROM_HANDLE(tu_image, image, _image);
750 
751    if (!image)
752       return;
753 
754 #ifdef ANDROID
755    if (image->owned_memory != VK_NULL_HANDLE)
756       tu_FreeMemory(_device, image->owned_memory, pAllocator);
757 #endif
758 
759    vk_object_free(&device->vk, pAllocator, image);
760 }
761 
762 VKAPI_ATTR void VKAPI_CALL
tu_GetImageSubresourceLayout(VkDevice _device,VkImage _image,const VkImageSubresource * pSubresource,VkSubresourceLayout * pLayout)763 tu_GetImageSubresourceLayout(VkDevice _device,
764                              VkImage _image,
765                              const VkImageSubresource *pSubresource,
766                              VkSubresourceLayout *pLayout)
767 {
768    TU_FROM_HANDLE(tu_image, image, _image);
769 
770    struct fdl_layout *layout =
771       &image->layout[tu6_plane_index(image->vk_format, pSubresource->aspectMask)];
772    const struct fdl_slice *slice = layout->slices + pSubresource->mipLevel;
773 
774    pLayout->offset =
775       fdl_surface_offset(layout, pSubresource->mipLevel, pSubresource->arrayLayer);
776    pLayout->rowPitch = fdl_pitch(layout, pSubresource->mipLevel);
777    pLayout->arrayPitch = fdl_layer_stride(layout, pSubresource->mipLevel);
778    pLayout->depthPitch = slice->size0;
779    pLayout->size = pLayout->depthPitch * layout->depth0;
780 
781    if (fdl_ubwc_enabled(layout, pSubresource->mipLevel)) {
782       /* UBWC starts at offset 0 */
783       pLayout->offset = 0;
784       /* UBWC scanout won't match what the kernel wants if we have levels/layers */
785       assert(image->level_count == 1 && image->layer_count == 1);
786    }
787 }
788 
789 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetImageDrmFormatModifierPropertiesEXT(VkDevice device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)790 tu_GetImageDrmFormatModifierPropertiesEXT(
791     VkDevice                                    device,
792     VkImage                                     _image,
793     VkImageDrmFormatModifierPropertiesEXT*      pProperties)
794 {
795    TU_FROM_HANDLE(tu_image, image, _image);
796 
797    /* TODO invent a modifier for tiled but not UBWC buffers */
798 
799    if (!image->layout[0].tile_mode)
800       pProperties->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
801    else if (image->layout[0].ubwc_layer_size)
802       pProperties->drmFormatModifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
803    else
804       pProperties->drmFormatModifier = DRM_FORMAT_MOD_INVALID;
805 
806    return VK_SUCCESS;
807 }
808 
809 
810 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateImageView(VkDevice _device,const VkImageViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImageView * pView)811 tu_CreateImageView(VkDevice _device,
812                    const VkImageViewCreateInfo *pCreateInfo,
813                    const VkAllocationCallbacks *pAllocator,
814                    VkImageView *pView)
815 {
816    TU_FROM_HANDLE(tu_device, device, _device);
817    struct tu_image_view *view;
818 
819    view = vk_object_alloc(&device->vk, pAllocator, sizeof(*view),
820                           VK_OBJECT_TYPE_IMAGE_VIEW);
821    if (view == NULL)
822       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
823 
824    tu_image_view_init(view, pCreateInfo, device->physical_device->info->a6xx.has_z24uint_s8uint);
825 
826    *pView = tu_image_view_to_handle(view);
827 
828    return VK_SUCCESS;
829 }
830 
831 VKAPI_ATTR void VKAPI_CALL
tu_DestroyImageView(VkDevice _device,VkImageView _iview,const VkAllocationCallbacks * pAllocator)832 tu_DestroyImageView(VkDevice _device,
833                     VkImageView _iview,
834                     const VkAllocationCallbacks *pAllocator)
835 {
836    TU_FROM_HANDLE(tu_device, device, _device);
837    TU_FROM_HANDLE(tu_image_view, iview, _iview);
838 
839    if (!iview)
840       return;
841 
842    vk_object_free(&device->vk, pAllocator, iview);
843 }
844 
845 void
tu_buffer_view_init(struct tu_buffer_view * view,struct tu_device * device,const VkBufferViewCreateInfo * pCreateInfo)846 tu_buffer_view_init(struct tu_buffer_view *view,
847                     struct tu_device *device,
848                     const VkBufferViewCreateInfo *pCreateInfo)
849 {
850    TU_FROM_HANDLE(tu_buffer, buffer, pCreateInfo->buffer);
851 
852    view->buffer = buffer;
853 
854    enum VkFormat vfmt = pCreateInfo->format;
855    enum pipe_format pfmt = vk_format_to_pipe_format(vfmt);
856    const struct tu_native_format fmt = tu6_format_texture(vfmt, TILE6_LINEAR);
857 
858    uint32_t range;
859    if (pCreateInfo->range == VK_WHOLE_SIZE)
860       range = buffer->size - pCreateInfo->offset;
861    else
862       range = pCreateInfo->range;
863    uint32_t elements = range / util_format_get_blocksize(pfmt);
864 
865    static const VkComponentMapping components = {
866       .r = VK_COMPONENT_SWIZZLE_R,
867       .g = VK_COMPONENT_SWIZZLE_G,
868       .b = VK_COMPONENT_SWIZZLE_B,
869       .a = VK_COMPONENT_SWIZZLE_A,
870    };
871 
872    uint64_t iova = tu_buffer_iova(buffer) + pCreateInfo->offset;
873 
874    memset(&view->descriptor, 0, sizeof(view->descriptor));
875 
876    view->descriptor[0] =
877       A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) |
878       A6XX_TEX_CONST_0_SWAP(fmt.swap) |
879       A6XX_TEX_CONST_0_FMT(fmt.fmt) |
880       A6XX_TEX_CONST_0_MIPLVLS(0) |
881       tu6_texswiz(&components, NULL, vfmt, VK_IMAGE_ASPECT_COLOR_BIT, false);
882       COND(vk_format_is_srgb(vfmt), A6XX_TEX_CONST_0_SRGB);
883    view->descriptor[1] =
884       A6XX_TEX_CONST_1_WIDTH(elements & MASK(15)) |
885       A6XX_TEX_CONST_1_HEIGHT(elements >> 15);
886    view->descriptor[2] =
887       A6XX_TEX_CONST_2_UNK4 |
888       A6XX_TEX_CONST_2_UNK31;
889    view->descriptor[4] = iova;
890    view->descriptor[5] = iova >> 32;
891 }
892 
893 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBufferView(VkDevice _device,const VkBufferViewCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBufferView * pView)894 tu_CreateBufferView(VkDevice _device,
895                     const VkBufferViewCreateInfo *pCreateInfo,
896                     const VkAllocationCallbacks *pAllocator,
897                     VkBufferView *pView)
898 {
899    TU_FROM_HANDLE(tu_device, device, _device);
900    struct tu_buffer_view *view;
901 
902    view = vk_object_alloc(&device->vk, pAllocator, sizeof(*view),
903                           VK_OBJECT_TYPE_BUFFER_VIEW);
904    if (!view)
905       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
906 
907    tu_buffer_view_init(view, device, pCreateInfo);
908 
909    *pView = tu_buffer_view_to_handle(view);
910 
911    return VK_SUCCESS;
912 }
913 
914 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBufferView(VkDevice _device,VkBufferView bufferView,const VkAllocationCallbacks * pAllocator)915 tu_DestroyBufferView(VkDevice _device,
916                      VkBufferView bufferView,
917                      const VkAllocationCallbacks *pAllocator)
918 {
919    TU_FROM_HANDLE(tu_device, device, _device);
920    TU_FROM_HANDLE(tu_buffer_view, view, bufferView);
921 
922    if (!view)
923       return;
924 
925    vk_object_free(&device->vk, pAllocator, view);
926 }
927