1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 
26 static uint32_t
num_subpass_attachments(const VkSubpassDescription * desc)27 num_subpass_attachments(const VkSubpassDescription *desc)
28 {
29    return desc->inputAttachmentCount +
30           desc->colorAttachmentCount +
31           (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
32           (desc->pDepthStencilAttachment != NULL);
33 }
34 
35 static void
set_use_tlb_resolve(struct v3dv_device * device,struct v3dv_render_pass_attachment * att)36 set_use_tlb_resolve(struct v3dv_device *device,
37                     struct v3dv_render_pass_attachment *att)
38 {
39    const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
40    att->use_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
41 }
42 
43 static void
pass_find_subpass_range_for_attachments(struct v3dv_device * device,struct v3dv_render_pass * pass)44 pass_find_subpass_range_for_attachments(struct v3dv_device *device,
45                                         struct v3dv_render_pass *pass)
46 {
47    for (uint32_t i = 0; i < pass->attachment_count; i++) {
48       pass->attachments[i].first_subpass = pass->subpass_count - 1;
49       pass->attachments[i].last_subpass = 0;
50       if (pass->multiview_enabled) {
51          for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) {
52             pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1;
53             pass->attachments[i].views[j].last_subpass = 0;
54          }
55       }
56    }
57 
58    for (uint32_t i = 0; i < pass->subpass_count; i++) {
59       const struct v3dv_subpass *subpass = &pass->subpasses[i];
60 
61       for (uint32_t j = 0; j < subpass->color_count; j++) {
62          uint32_t attachment_idx = subpass->color_attachments[j].attachment;
63          if (attachment_idx == VK_ATTACHMENT_UNUSED)
64             continue;
65 
66          struct v3dv_render_pass_attachment *att =
67             &pass->attachments[attachment_idx];
68 
69          if (i < att->first_subpass)
70             att->first_subpass = i;
71          if (i > att->last_subpass)
72             att->last_subpass = i;
73 
74          uint32_t view_mask = subpass->view_mask;
75          while (view_mask) {
76             uint32_t view_index = u_bit_scan(&view_mask);
77             if (i < att->views[view_index].first_subpass)
78                att->views[view_index].first_subpass = i;
79             if (i > att->views[view_index].last_subpass)
80                att->views[view_index].last_subpass = i;
81          }
82 
83          if (subpass->resolve_attachments &&
84              subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
85             set_use_tlb_resolve(device, att);
86          }
87       }
88 
89       uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
90       if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
91          if (i < pass->attachments[ds_attachment_idx].first_subpass)
92             pass->attachments[ds_attachment_idx].first_subpass = i;
93          if (i > pass->attachments[ds_attachment_idx].last_subpass)
94             pass->attachments[ds_attachment_idx].last_subpass = i;
95       }
96 
97       for (uint32_t j = 0; j < subpass->input_count; j++) {
98          uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
99          if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
100             continue;
101          if (i < pass->attachments[input_attachment_idx].first_subpass)
102             pass->attachments[input_attachment_idx].first_subpass = i;
103          if (i > pass->attachments[input_attachment_idx].last_subpass)
104             pass->attachments[input_attachment_idx].last_subpass = i;
105       }
106 
107       if (subpass->resolve_attachments) {
108          for (uint32_t j = 0; j < subpass->color_count; j++) {
109             uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
110             if (attachment_idx == VK_ATTACHMENT_UNUSED)
111                continue;
112             if (i < pass->attachments[attachment_idx].first_subpass)
113                pass->attachments[attachment_idx].first_subpass = i;
114             if (i > pass->attachments[attachment_idx].last_subpass)
115                pass->attachments[attachment_idx].last_subpass = i;
116          }
117       }
118    }
119 }
120 
121 
122 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateRenderPass(VkDevice _device,const VkRenderPassCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)123 v3dv_CreateRenderPass(VkDevice _device,
124                       const VkRenderPassCreateInfo *pCreateInfo,
125                       const VkAllocationCallbacks *pAllocator,
126                       VkRenderPass *pRenderPass)
127 {
128    V3DV_FROM_HANDLE(v3dv_device, device, _device);
129    struct v3dv_render_pass *pass;
130 
131    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
132 
133    const VkRenderPassMultiviewCreateInfo *multiview_info =
134       vk_find_struct_const(pCreateInfo->pNext, RENDER_PASS_MULTIVIEW_CREATE_INFO);
135    bool multiview_enabled = multiview_info && multiview_info->subpassCount > 0;
136 
137    size_t size = sizeof(*pass);
138    size_t subpasses_offset = size;
139    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
140    size_t attachments_offset = size;
141    size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
142 
143    pass = vk_object_zalloc(&device->vk, pAllocator, size,
144                            VK_OBJECT_TYPE_RENDER_PASS);
145    if (pass == NULL)
146       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
147 
148    pass->multiview_enabled = multiview_enabled;
149    pass->attachment_count = pCreateInfo->attachmentCount;
150    pass->attachments = (void *) pass + attachments_offset;
151    pass->subpass_count = pCreateInfo->subpassCount;
152    pass->subpasses = (void *) pass + subpasses_offset;
153 
154    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
155       pass->attachments[i].desc = pCreateInfo->pAttachments[i];
156 
157    uint32_t subpass_attachment_count = 0;
158    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
159       const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
160       subpass_attachment_count += num_subpass_attachments(desc);
161    }
162 
163    if (subpass_attachment_count) {
164       const size_t subpass_attachment_bytes =
165          subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
166       pass->subpass_attachments =
167          vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,
168                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
169       if (pass->subpass_attachments == NULL) {
170          vk_object_free(&device->vk, pAllocator, pass);
171          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
172       }
173    } else {
174       pass->subpass_attachments = NULL;
175    }
176 
177    struct v3dv_subpass_attachment *p = pass->subpass_attachments;
178    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
179       const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
180       struct v3dv_subpass *subpass = &pass->subpasses[i];
181 
182       subpass->input_count = desc->inputAttachmentCount;
183       subpass->color_count = desc->colorAttachmentCount;
184       if (multiview_enabled)
185          subpass->view_mask = multiview_info->pViewMasks[i];
186 
187       if (desc->inputAttachmentCount > 0) {
188          subpass->input_attachments = p;
189          p += desc->inputAttachmentCount;
190 
191          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
192             subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
193                .attachment = desc->pInputAttachments[j].attachment,
194                .layout = desc->pInputAttachments[j].layout,
195             };
196          }
197       }
198 
199       if (desc->colorAttachmentCount > 0) {
200          subpass->color_attachments = p;
201          p += desc->colorAttachmentCount;
202 
203          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
204             subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
205                .attachment = desc->pColorAttachments[j].attachment,
206                .layout = desc->pColorAttachments[j].layout,
207             };
208          }
209       }
210 
211       if (desc->pResolveAttachments) {
212          subpass->resolve_attachments = p;
213          p += desc->colorAttachmentCount;
214 
215          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
216             subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
217                .attachment = desc->pResolveAttachments[j].attachment,
218                .layout = desc->pResolveAttachments[j].layout,
219             };
220          }
221       }
222 
223       if (desc->pDepthStencilAttachment) {
224          subpass->ds_attachment = (struct v3dv_subpass_attachment) {
225             .attachment = desc->pDepthStencilAttachment->attachment,
226             .layout = desc->pDepthStencilAttachment->layout,
227          };
228 
229          /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
230           * the clear might get lost. If a subpass has this then we can't emit
231           * the clear using the TLB and we have to do it as a draw call.
232           *
233           * FIXME: separate stencil.
234           */
235          if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
236             struct v3dv_render_pass_attachment *att =
237                &pass->attachments[subpass->ds_attachment.attachment];
238             if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
239                if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
240                    att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
241                   subpass->do_depth_clear_with_draw = true;
242                } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
243                           att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
244                   subpass->do_stencil_clear_with_draw = true;
245                }
246             }
247          }
248       } else {
249          subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
250       }
251    }
252 
253    pass_find_subpass_range_for_attachments(device, pass);
254 
255    /* FIXME: handle subpass dependencies */
256 
257    *pRenderPass = v3dv_render_pass_to_handle(pass);
258 
259    return VK_SUCCESS;
260 }
261 
262 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)263 v3dv_DestroyRenderPass(VkDevice _device,
264                        VkRenderPass _pass,
265                        const VkAllocationCallbacks *pAllocator)
266 {
267    V3DV_FROM_HANDLE(v3dv_device, device, _device);
268    V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
269 
270    if (!_pass)
271       return;
272 
273    vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
274    vk_object_free(&device->vk, pAllocator, pass);
275 }
276 
277 static void
subpass_get_granularity(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,VkExtent2D * granularity)278 subpass_get_granularity(struct v3dv_device *device,
279                         struct v3dv_render_pass *pass,
280                         uint32_t subpass_idx,
281                         VkExtent2D *granularity)
282 {
283    static const uint8_t tile_sizes[] = {
284       64, 64,
285       64, 32,
286       32, 32,
287       32, 16,
288       16, 16,
289       16,  8,
290        8,  8
291    };
292 
293    /* Our tile size depends on the number of color attachments and the maximum
294     * bpp across them.
295     */
296    assert(subpass_idx < pass->subpass_count);
297    struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
298    const uint32_t color_attachment_count = subpass->color_count;
299 
300    uint32_t max_internal_bpp = 0;
301    for (uint32_t i = 0; i < color_attachment_count; i++) {
302       uint32_t attachment_idx = subpass->color_attachments[i].attachment;
303       if (attachment_idx == VK_ATTACHMENT_UNUSED)
304          continue;
305       const VkAttachmentDescription *desc =
306          &pass->attachments[attachment_idx].desc;
307       const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
308       uint32_t internal_type, internal_bpp;
309       v3dv_X(device, get_internal_type_bpp_for_output_format)
310          (format->rt_type, &internal_type, &internal_bpp);
311 
312       max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
313    }
314 
315    uint32_t idx = 0;
316    if (color_attachment_count > 2)
317       idx += 2;
318    else if (color_attachment_count > 1)
319       idx += 1;
320 
321    idx += max_internal_bpp;
322 
323    assert(idx < ARRAY_SIZE(tile_sizes));
324    *granularity = (VkExtent2D) {
325       .width = tile_sizes[idx * 2],
326       .height = tile_sizes[idx * 2 + 1]
327    };
328 }
329 
330 VKAPI_ATTR void VKAPI_CALL
v3dv_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)331 v3dv_GetRenderAreaGranularity(VkDevice _device,
332                               VkRenderPass renderPass,
333                               VkExtent2D *pGranularity)
334 {
335    V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
336    V3DV_FROM_HANDLE(v3dv_device, device, _device);
337 
338    *pGranularity = (VkExtent2D) {
339       .width = 64,
340       .height = 64,
341    };
342 
343    for (uint32_t i = 0; i < pass->subpass_count; i++) {
344       VkExtent2D sg;
345       subpass_get_granularity(device, pass, i, &sg);
346       pGranularity->width = MIN2(pGranularity->width, sg.width);
347       pGranularity->height = MIN2(pGranularity->height, sg.height);
348    }
349 }
350 
351 /* Checks whether the render area rectangle covers a region that is aligned to
352  * tile boundaries. This means that we are writing to all pixels covered by
353  * all tiles in that area (except for pixels on edge tiles that are outside
354  * the framebuffer dimensions).
355  *
356  * When our framebuffer is aligned to tile boundaries we know we are writing
357  * valid data to all all pixels in each tile and we can apply certain
358  * optimizations, like avoiding tile loads, since we know that none of the
359  * original pixel values in each tile for that area need to be preserved.
360  * We also use this to decide if we can use TLB clears, as these clear whole
361  * tiles so we can't use them if the render area is not aligned.
362  *
363  * Note that when an image is created it will possibly include padding blocks
364  * depending on its tiling layout. When the framebuffer dimensions are not
365  * aligned to tile boundaries then edge tiles are only partially covered by the
366  * framebuffer pixels, but tile stores still seem to store full tiles
367  * writing to the padded sections. This is important when the framebuffer
368  * is aliasing a smaller section of a larger image, as in that case the edge
369  * tiles of the framebuffer would overwrite valid pixels in the larger image.
370  * In that case, we can't flag the area as being aligned.
371  */
372 bool
v3dv_subpass_area_is_tile_aligned(struct v3dv_device * device,const VkRect2D * area,struct v3dv_framebuffer * fb,struct v3dv_render_pass * pass,uint32_t subpass_idx)373 v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
374                                   const VkRect2D *area,
375                                   struct v3dv_framebuffer *fb,
376                                   struct v3dv_render_pass *pass,
377                                   uint32_t subpass_idx)
378 {
379    assert(subpass_idx < pass->subpass_count);
380 
381    VkExtent2D granularity;
382    subpass_get_granularity(device, pass, subpass_idx, &granularity);
383 
384    return area->offset.x % granularity.width == 0 &&
385           area->offset.y % granularity.height == 0 &&
386          (area->extent.width % granularity.width == 0 ||
387           (fb->has_edge_padding &&
388            area->offset.x + area->extent.width >= fb->width)) &&
389          (area->extent.height % granularity.height == 0 ||
390           (fb->has_edge_padding &&
391            area->offset.y + area->extent.height >= fb->height));
392 }
393