1 /*
2 * Copyright © 2021 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/compiler/v3d_compiler.h"
30
31 #include "vk_format_info.h"
32
33 struct rcl_clear_info {
34 const union v3dv_clear_value *clear_value;
35 struct v3dv_image *image;
36 VkImageAspectFlags aspects;
37 uint32_t level;
38 };
39
40 static struct v3dv_cl *
emit_rcl_prologue(struct v3dv_job * job,struct v3dv_meta_framebuffer * fb,const struct rcl_clear_info * clear_info)41 emit_rcl_prologue(struct v3dv_job *job,
42 struct v3dv_meta_framebuffer *fb,
43 const struct rcl_clear_info *clear_info)
44 {
45 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
46
47 struct v3dv_cl *rcl = &job->rcl;
48 v3dv_cl_ensure_space_with_branch(rcl, 200 +
49 tiling->layers * 256 *
50 cl_packet_length(SUPERTILE_COORDINATES));
51 if (job->cmd_buffer->state.oom)
52 return NULL;
53
54 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
55 config.early_z_disable = true;
56 config.image_width_pixels = tiling->width;
57 config.image_height_pixels = tiling->height;
58 config.number_of_render_targets = 1;
59 config.multisample_mode_4x = tiling->msaa;
60 config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
61 config.internal_depth_type = fb->internal_depth_type;
62 }
63
64 if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
65 uint32_t clear_pad = 0;
66 if (clear_info->image) {
67 const struct v3dv_image *image = clear_info->image;
68 const struct v3d_resource_slice *slice =
69 &image->slices[clear_info->level];
70 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
71 slice->tiling == V3D_TILING_UIF_XOR) {
72 int uif_block_height = v3d_utile_height(image->cpp) * 2;
73
74 uint32_t implicit_padded_height =
75 align(tiling->height, uif_block_height) / uif_block_height;
76
77 if (slice->padded_height_of_output_image_in_uif_blocks -
78 implicit_padded_height >= 15) {
79 clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
80 }
81 }
82 }
83
84 const uint32_t *color = &clear_info->clear_value->color[0];
85 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
86 clear.clear_color_low_32_bits = color[0];
87 clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
88 clear.render_target_number = 0;
89 };
90
91 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
92 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
93 clear.clear_color_mid_low_32_bits =
94 ((color[1] >> 24) | (color[2] << 8));
95 clear.clear_color_mid_high_24_bits =
96 ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
97 clear.render_target_number = 0;
98 };
99 }
100
101 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
102 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
103 clear.uif_padded_height_in_uif_blocks = clear_pad;
104 clear.clear_color_high_16_bits = color[3] >> 16;
105 clear.render_target_number = 0;
106 };
107 }
108 }
109
110 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
111 rt.render_target_0_internal_bpp = tiling->internal_bpp;
112 rt.render_target_0_internal_type = fb->internal_type;
113 rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
114 }
115
116 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
117 clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
118 clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
119 };
120
121 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
122 init.use_auto_chained_tile_lists = true;
123 init.size_of_first_block_in_chained_tile_lists =
124 TILE_ALLOCATION_BLOCK_SIZE_64B;
125 }
126
127 return rcl;
128 }
129
130 static void
emit_frame_setup(struct v3dv_job * job,uint32_t min_layer,const union v3dv_clear_value * clear_value)131 emit_frame_setup(struct v3dv_job *job,
132 uint32_t min_layer,
133 const union v3dv_clear_value *clear_value)
134 {
135 v3dv_return_if_oom(NULL, job);
136
137 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
138
139 struct v3dv_cl *rcl = &job->rcl;
140
141 const uint32_t tile_alloc_offset =
142 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
143 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
144 list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
145 }
146
147 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
148 config.number_of_bin_tile_lists = 1;
149 config.total_frame_width_in_tiles = tiling->draw_tiles_x;
150 config.total_frame_height_in_tiles = tiling->draw_tiles_y;
151
152 config.supertile_width_in_tiles = tiling->supertile_width;
153 config.supertile_height_in_tiles = tiling->supertile_height;
154
155 config.total_frame_width_in_supertiles =
156 tiling->frame_width_in_supertiles;
157 config.total_frame_height_in_supertiles =
158 tiling->frame_height_in_supertiles;
159 }
160
161 /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
162 * it here.
163 */
164 for (int i = 0; i < 2; i++) {
165 cl_emit(rcl, TILE_COORDINATES, coords);
166 cl_emit(rcl, END_OF_LOADS, end);
167 cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
168 store.buffer_to_store = NONE;
169 }
170 if (clear_value && i == 0) {
171 cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
172 clear.clear_z_stencil_buffer = true;
173 clear.clear_all_render_targets = true;
174 }
175 }
176 cl_emit(rcl, END_OF_TILE_MARKER, end);
177 }
178
179 cl_emit(rcl, FLUSH_VCD_CACHE, flush);
180 }
181
182 static void
emit_supertile_coordinates(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer)183 emit_supertile_coordinates(struct v3dv_job *job,
184 struct v3dv_meta_framebuffer *framebuffer)
185 {
186 v3dv_return_if_oom(NULL, job);
187
188 struct v3dv_cl *rcl = &job->rcl;
189
190 const uint32_t min_y = framebuffer->min_y_supertile;
191 const uint32_t max_y = framebuffer->max_y_supertile;
192 const uint32_t min_x = framebuffer->min_x_supertile;
193 const uint32_t max_x = framebuffer->max_x_supertile;
194
195 for (int y = min_y; y <= max_y; y++) {
196 for (int x = min_x; x <= max_x; x++) {
197 cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
198 coords.column_number_in_supertiles = x;
199 coords.row_number_in_supertiles = y;
200 }
201 }
202 }
203 }
204
205 static void
emit_linear_load(struct v3dv_cl * cl,uint32_t buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t stride,uint32_t format)206 emit_linear_load(struct v3dv_cl *cl,
207 uint32_t buffer,
208 struct v3dv_bo *bo,
209 uint32_t offset,
210 uint32_t stride,
211 uint32_t format)
212 {
213 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
214 load.buffer_to_load = buffer;
215 load.address = v3dv_cl_address(bo, offset);
216 load.input_image_format = format;
217 load.memory_format = V3D_TILING_RASTER;
218 load.height_in_ub_or_stride = stride;
219 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
220 }
221 }
222
223 static void
emit_linear_store(struct v3dv_cl * cl,uint32_t buffer,struct v3dv_bo * bo,uint32_t offset,uint32_t stride,bool msaa,uint32_t format)224 emit_linear_store(struct v3dv_cl *cl,
225 uint32_t buffer,
226 struct v3dv_bo *bo,
227 uint32_t offset,
228 uint32_t stride,
229 bool msaa,
230 uint32_t format)
231 {
232 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
233 store.buffer_to_store = RENDER_TARGET_0;
234 store.address = v3dv_cl_address(bo, offset);
235 store.clear_buffer_being_stored = false;
236 store.output_image_format = format;
237 store.memory_format = V3D_TILING_RASTER;
238 store.height_in_ub_or_stride = stride;
239 store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
240 V3D_DECIMATE_MODE_SAMPLE_0;
241 }
242 }
243
244 /* This chooses a tile buffer format that is appropriate for the copy operation.
245 * Typically, this is the image render target type, however, if we are copying
246 * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
247 * we need to load and store to/from a tile color buffer using a compatible
248 * color format.
249 */
250 static uint32_t
choose_tlb_format(struct v3dv_meta_framebuffer * framebuffer,VkImageAspectFlags aspect,bool for_store,bool is_copy_to_buffer,bool is_copy_from_buffer)251 choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
252 VkImageAspectFlags aspect,
253 bool for_store,
254 bool is_copy_to_buffer,
255 bool is_copy_from_buffer)
256 {
257 if (is_copy_to_buffer || is_copy_from_buffer) {
258 switch (framebuffer->vk_format) {
259 case VK_FORMAT_D16_UNORM:
260 return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
261 case VK_FORMAT_D32_SFLOAT:
262 return V3D_OUTPUT_IMAGE_FORMAT_R32F;
263 case VK_FORMAT_X8_D24_UNORM_PACK32:
264 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
265 case VK_FORMAT_D24_UNORM_S8_UINT:
266 /* When storing the stencil aspect of a combined depth/stencil image
267 * to a buffer, the Vulkan spec states that the output buffer must
268 * have packed stencil values, so we choose an R8UI format for our
269 * store outputs. For the load input we still want RGBA8UI since the
270 * source image contains 4 channels (including the 3 channels
271 * containing the 24-bit depth value).
272 *
273 * When loading the stencil aspect of a combined depth/stencil image
274 * from a buffer, we read packed 8-bit stencil values from the buffer
275 * that we need to put into the LSB of the 32-bit format (the R
276 * channel), so we use R8UI. For the store, if we used R8UI then we
277 * would write 8-bit stencil values consecutively over depth channels,
278 * so we need to use RGBA8UI. This will write each stencil value in
279 * its correct position, but will overwrite depth values (channels G
280 * B,A) with undefined values. To fix this, we will have to restore
281 * the depth aspect from the Z tile buffer, which we should pre-load
282 * from the image before the store).
283 */
284 if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
285 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
286 } else {
287 assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
288 if (is_copy_to_buffer) {
289 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
290 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
291 } else {
292 assert(is_copy_from_buffer);
293 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
294 V3D_OUTPUT_IMAGE_FORMAT_R8UI;
295 }
296 }
297 default: /* Color formats */
298 return framebuffer->format->rt_type;
299 break;
300 }
301 } else {
302 return framebuffer->format->rt_type;
303 }
304 }
305
306 static inline bool
format_needs_rb_swap(struct v3dv_device * device,VkFormat format)307 format_needs_rb_swap(struct v3dv_device *device,
308 VkFormat format)
309 {
310 const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
311 return swizzle[0] == PIPE_SWIZZLE_Z;
312 }
313
314 static void
emit_image_load(struct v3dv_device * device,struct v3dv_cl * cl,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspect,uint32_t layer,uint32_t mip_level,bool is_copy_to_buffer,bool is_copy_from_buffer)315 emit_image_load(struct v3dv_device *device,
316 struct v3dv_cl *cl,
317 struct v3dv_meta_framebuffer *framebuffer,
318 struct v3dv_image *image,
319 VkImageAspectFlags aspect,
320 uint32_t layer,
321 uint32_t mip_level,
322 bool is_copy_to_buffer,
323 bool is_copy_from_buffer)
324 {
325 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
326
327 /* For image to/from buffer copies we always load to and store from RT0,
328 * even for depth/stencil aspects, because the hardware can't do raster
329 * stores or loads from/to the depth/stencil tile buffers.
330 */
331 bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
332 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
333
334 const struct v3d_resource_slice *slice = &image->slices[mip_level];
335 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
336 load.buffer_to_load = load_to_color_tlb ?
337 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
338
339 load.address = v3dv_cl_address(image->mem->bo, layer_offset);
340
341 load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
342 is_copy_to_buffer,
343 is_copy_from_buffer);
344 load.memory_format = slice->tiling;
345
346 /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
347 * expects the depth value in the LSB bits of each 32-bit pixel.
348 * Unfortunately, the hardware seems to put the S8/X8 bits there and the
349 * depth bits on the MSB. To work around that we can reverse the channel
350 * order and then swap the R/B channels to get what we want.
351 *
352 * NOTE: reversing and swapping only gets us the behavior we want if the
353 * operations happen in that exact order, which seems to be the case when
354 * done on the tile buffer load operations. On the store, it seems the
355 * order is not the same. The order on the store is probably reversed so
356 * that reversing and swapping on both the load and the store preserves
357 * the original order of the channels in memory.
358 *
359 * Notice that we only need to do this when copying to a buffer, where
360 * depth and stencil aspects are copied as separate regions and
361 * the spec expects them to be tightly packed.
362 */
363 bool needs_rb_swap = false;
364 bool needs_chan_reverse = false;
365 if (is_copy_to_buffer &&
366 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
367 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
368 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
369 needs_rb_swap = true;
370 needs_chan_reverse = true;
371 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
372 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
373 /* This is not a raw data copy (i.e. we are clearing the image),
374 * so we need to make sure we respect the format swizzle.
375 */
376 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
377 }
378
379 load.r_b_swap = needs_rb_swap;
380 load.channel_reverse = needs_chan_reverse;
381
382 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
383 slice->tiling == V3D_TILING_UIF_XOR) {
384 load.height_in_ub_or_stride =
385 slice->padded_height_of_output_image_in_uif_blocks;
386 } else if (slice->tiling == V3D_TILING_RASTER) {
387 load.height_in_ub_or_stride = slice->stride;
388 }
389
390 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
391 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
392 else
393 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
394 }
395 }
396
397 static void
emit_image_store(struct v3dv_device * device,struct v3dv_cl * cl,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspect,uint32_t layer,uint32_t mip_level,bool is_copy_to_buffer,bool is_copy_from_buffer,bool is_multisample_resolve)398 emit_image_store(struct v3dv_device *device,
399 struct v3dv_cl *cl,
400 struct v3dv_meta_framebuffer *framebuffer,
401 struct v3dv_image *image,
402 VkImageAspectFlags aspect,
403 uint32_t layer,
404 uint32_t mip_level,
405 bool is_copy_to_buffer,
406 bool is_copy_from_buffer,
407 bool is_multisample_resolve)
408 {
409 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
410
411 bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
412 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
413
414 const struct v3d_resource_slice *slice = &image->slices[mip_level];
415 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
416 store.buffer_to_store = store_from_color_tlb ?
417 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
418
419 store.address = v3dv_cl_address(image->mem->bo, layer_offset);
420 store.clear_buffer_being_stored = false;
421
422 /* See rationale in emit_image_load() */
423 bool needs_rb_swap = false;
424 bool needs_chan_reverse = false;
425 if (is_copy_from_buffer &&
426 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
427 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
428 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
429 needs_rb_swap = true;
430 needs_chan_reverse = true;
431 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
432 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
433 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
434 }
435
436 store.r_b_swap = needs_rb_swap;
437 store.channel_reverse = needs_chan_reverse;
438
439 store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
440 is_copy_to_buffer,
441 is_copy_from_buffer);
442 store.memory_format = slice->tiling;
443 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
444 slice->tiling == V3D_TILING_UIF_XOR) {
445 store.height_in_ub_or_stride =
446 slice->padded_height_of_output_image_in_uif_blocks;
447 } else if (slice->tiling == V3D_TILING_RASTER) {
448 store.height_in_ub_or_stride = slice->stride;
449 }
450
451 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
452 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
453 else if (is_multisample_resolve)
454 store.decimate_mode = V3D_DECIMATE_MODE_4X;
455 else
456 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
457 }
458 }
459
460 static void
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_buffer * buffer,struct v3dv_image * image,uint32_t layer_offset,const VkBufferImageCopy2KHR * region)461 emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
462 struct v3dv_meta_framebuffer *framebuffer,
463 struct v3dv_buffer *buffer,
464 struct v3dv_image *image,
465 uint32_t layer_offset,
466 const VkBufferImageCopy2KHR *region)
467 {
468 struct v3dv_cl *cl = &job->indirect;
469 v3dv_cl_ensure_space(cl, 200, 1);
470 v3dv_return_if_oom(NULL, job);
471
472 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
473
474 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
475
476 /* Load image to TLB */
477 assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
478 layer_offset < region->imageSubresource.layerCount) ||
479 layer_offset < image->vk.extent.depth);
480
481 const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
482 region->imageSubresource.baseArrayLayer + layer_offset :
483 region->imageOffset.z + layer_offset;
484
485 emit_image_load(job->device, cl, framebuffer, image,
486 region->imageSubresource.aspectMask,
487 image_layer,
488 region->imageSubresource.mipLevel,
489 true, false);
490
491 cl_emit(cl, END_OF_LOADS, end);
492
493 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
494
495 /* Store TLB to buffer */
496 uint32_t width, height;
497 if (region->bufferRowLength == 0)
498 width = region->imageExtent.width;
499 else
500 width = region->bufferRowLength;
501
502 if (region->bufferImageHeight == 0)
503 height = region->imageExtent.height;
504 else
505 height = region->bufferImageHeight;
506
507 /* Handle copy from compressed format */
508 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
509 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
510
511 /* If we are storing stencil from a combined depth/stencil format the
512 * Vulkan spec states that the output buffer must have packed stencil
513 * values, where each stencil value is 1 byte.
514 */
515 uint32_t cpp =
516 region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
517 1 : image->cpp;
518 uint32_t buffer_stride = width * cpp;
519 uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
520 height * buffer_stride * layer_offset;
521
522 uint32_t format = choose_tlb_format(framebuffer,
523 region->imageSubresource.aspectMask,
524 true, true, false);
525 bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
526
527 emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
528 buffer_offset, buffer_stride, msaa, format);
529
530 cl_emit(cl, END_OF_TILE_MARKER, end);
531
532 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
533
534 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
535 branch.start = tile_list_start;
536 branch.end = v3dv_cl_get_address(cl);
537 }
538 }
539
540 static void
emit_copy_layer_to_buffer(struct v3dv_job * job,struct v3dv_buffer * buffer,struct v3dv_image * image,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkBufferImageCopy2KHR * region)541 emit_copy_layer_to_buffer(struct v3dv_job *job,
542 struct v3dv_buffer *buffer,
543 struct v3dv_image *image,
544 struct v3dv_meta_framebuffer *framebuffer,
545 uint32_t layer,
546 const VkBufferImageCopy2KHR *region)
547 {
548 emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
549 image, layer, region);
550 emit_supertile_coordinates(job, framebuffer);
551 }
552
553 void
v3dX(meta_emit_copy_image_to_buffer_rcl)554 v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
555 struct v3dv_buffer *buffer,
556 struct v3dv_image *image,
557 struct v3dv_meta_framebuffer *framebuffer,
558 const VkBufferImageCopy2KHR *region)
559 {
560 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
561 v3dv_return_if_oom(NULL, job);
562
563 emit_frame_setup(job, 0, NULL);
564 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
565 emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
566 cl_emit(rcl, END_OF_RENDERING, end);
567 }
568
569 static void
emit_resolve_image_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * dst,struct v3dv_image * src,uint32_t layer_offset,const VkImageResolve2KHR * region)570 emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
571 struct v3dv_meta_framebuffer *framebuffer,
572 struct v3dv_image *dst,
573 struct v3dv_image *src,
574 uint32_t layer_offset,
575 const VkImageResolve2KHR *region)
576 {
577 struct v3dv_cl *cl = &job->indirect;
578 v3dv_cl_ensure_space(cl, 200, 1);
579 v3dv_return_if_oom(NULL, job);
580
581 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
582
583 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
584
585 assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
586 layer_offset < region->srcSubresource.layerCount) ||
587 layer_offset < src->vk.extent.depth);
588
589 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
590 region->srcSubresource.baseArrayLayer + layer_offset :
591 region->srcOffset.z + layer_offset;
592
593 emit_image_load(job->device, cl, framebuffer, src,
594 region->srcSubresource.aspectMask,
595 src_layer,
596 region->srcSubresource.mipLevel,
597 false, false);
598
599 cl_emit(cl, END_OF_LOADS, end);
600
601 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
602
603 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
604 layer_offset < region->dstSubresource.layerCount) ||
605 layer_offset < dst->vk.extent.depth);
606
607 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
608 region->dstSubresource.baseArrayLayer + layer_offset :
609 region->dstOffset.z + layer_offset;
610
611 emit_image_store(job->device, cl, framebuffer, dst,
612 region->dstSubresource.aspectMask,
613 dst_layer,
614 region->dstSubresource.mipLevel,
615 false, false, true);
616
617 cl_emit(cl, END_OF_TILE_MARKER, end);
618
619 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
620
621 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
622 branch.start = tile_list_start;
623 branch.end = v3dv_cl_get_address(cl);
624 }
625 }
626
627 static void
emit_resolve_image_layer(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkImageResolve2KHR * region)628 emit_resolve_image_layer(struct v3dv_job *job,
629 struct v3dv_image *dst,
630 struct v3dv_image *src,
631 struct v3dv_meta_framebuffer *framebuffer,
632 uint32_t layer,
633 const VkImageResolve2KHR *region)
634 {
635 emit_resolve_image_layer_per_tile_list(job, framebuffer,
636 dst, src, layer, region);
637 emit_supertile_coordinates(job, framebuffer);
638 }
639
640 void
v3dX(meta_emit_resolve_image_rcl)641 v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
642 struct v3dv_image *dst,
643 struct v3dv_image *src,
644 struct v3dv_meta_framebuffer *framebuffer,
645 const VkImageResolve2KHR *region)
646 {
647 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
648 v3dv_return_if_oom(NULL, job);
649
650 emit_frame_setup(job, 0, NULL);
651 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
652 emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
653 cl_emit(rcl, END_OF_RENDERING, end);
654 }
655
656 static void
emit_copy_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_bo * dst,struct v3dv_bo * src,uint32_t dst_offset,uint32_t src_offset,uint32_t stride,uint32_t format)657 emit_copy_buffer_per_tile_list(struct v3dv_job *job,
658 struct v3dv_bo *dst,
659 struct v3dv_bo *src,
660 uint32_t dst_offset,
661 uint32_t src_offset,
662 uint32_t stride,
663 uint32_t format)
664 {
665 struct v3dv_cl *cl = &job->indirect;
666 v3dv_cl_ensure_space(cl, 200, 1);
667 v3dv_return_if_oom(NULL, job);
668
669 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
670
671 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
672
673 emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
674
675 cl_emit(cl, END_OF_LOADS, end);
676
677 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
678
679 emit_linear_store(cl, RENDER_TARGET_0,
680 dst, dst_offset, stride, false, format);
681
682 cl_emit(cl, END_OF_TILE_MARKER, end);
683
684 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
685
686 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
687 branch.start = tile_list_start;
688 branch.end = v3dv_cl_get_address(cl);
689 }
690 }
691
692 void
v3dX(meta_emit_copy_buffer)693 v3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
694 struct v3dv_bo *dst,
695 struct v3dv_bo *src,
696 uint32_t dst_offset,
697 uint32_t src_offset,
698 struct v3dv_meta_framebuffer *framebuffer,
699 uint32_t format,
700 uint32_t item_size)
701 {
702 const uint32_t stride = job->frame_tiling.width * item_size;
703 emit_copy_buffer_per_tile_list(job, dst, src,
704 dst_offset, src_offset,
705 stride, format);
706 emit_supertile_coordinates(job, framebuffer);
707 }
708
709 void
v3dX(meta_emit_copy_buffer_rcl)710 v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
711 struct v3dv_bo *dst,
712 struct v3dv_bo *src,
713 uint32_t dst_offset,
714 uint32_t src_offset,
715 struct v3dv_meta_framebuffer *framebuffer,
716 uint32_t format,
717 uint32_t item_size)
718 {
719 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
720 v3dv_return_if_oom(NULL, job);
721
722 emit_frame_setup(job, 0, NULL);
723
724 v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
725 framebuffer, format, item_size);
726
727 cl_emit(rcl, END_OF_RENDERING, end);
728 }
729
730 static void
emit_copy_image_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * dst,struct v3dv_image * src,uint32_t layer_offset,const VkImageCopy2KHR * region)731 emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
732 struct v3dv_meta_framebuffer *framebuffer,
733 struct v3dv_image *dst,
734 struct v3dv_image *src,
735 uint32_t layer_offset,
736 const VkImageCopy2KHR *region)
737 {
738 struct v3dv_cl *cl = &job->indirect;
739 v3dv_cl_ensure_space(cl, 200, 1);
740 v3dv_return_if_oom(NULL, job);
741
742 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
743
744 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
745
746 assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
747 layer_offset < region->srcSubresource.layerCount) ||
748 layer_offset < src->vk.extent.depth);
749
750 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
751 region->srcSubresource.baseArrayLayer + layer_offset :
752 region->srcOffset.z + layer_offset;
753
754 emit_image_load(job->device, cl, framebuffer, src,
755 region->srcSubresource.aspectMask,
756 src_layer,
757 region->srcSubresource.mipLevel,
758 false, false);
759
760 cl_emit(cl, END_OF_LOADS, end);
761
762 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
763
764 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
765 layer_offset < region->dstSubresource.layerCount) ||
766 layer_offset < dst->vk.extent.depth);
767
768 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
769 region->dstSubresource.baseArrayLayer + layer_offset :
770 region->dstOffset.z + layer_offset;
771
772 emit_image_store(job->device, cl, framebuffer, dst,
773 region->dstSubresource.aspectMask,
774 dst_layer,
775 region->dstSubresource.mipLevel,
776 false, false, false);
777
778 cl_emit(cl, END_OF_TILE_MARKER, end);
779
780 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
781
782 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
783 branch.start = tile_list_start;
784 branch.end = v3dv_cl_get_address(cl);
785 }
786 }
787
788 static void
emit_copy_image_layer(struct v3dv_job * job,struct v3dv_image * dst,struct v3dv_image * src,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkImageCopy2KHR * region)789 emit_copy_image_layer(struct v3dv_job *job,
790 struct v3dv_image *dst,
791 struct v3dv_image *src,
792 struct v3dv_meta_framebuffer *framebuffer,
793 uint32_t layer,
794 const VkImageCopy2KHR *region)
795 {
796 emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
797 emit_supertile_coordinates(job, framebuffer);
798 }
799
800 void
v3dX(meta_emit_copy_image_rcl)801 v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
802 struct v3dv_image *dst,
803 struct v3dv_image *src,
804 struct v3dv_meta_framebuffer *framebuffer,
805 const VkImageCopy2KHR *region)
806 {
807 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
808 v3dv_return_if_oom(NULL, job);
809
810 emit_frame_setup(job, 0, NULL);
811 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
812 emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
813 cl_emit(rcl, END_OF_RENDERING, end);
814 }
815
816 void
v3dX(meta_emit_tfu_job)817 v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
818 struct v3dv_image *dst,
819 uint32_t dst_mip_level,
820 uint32_t dst_layer,
821 struct v3dv_image *src,
822 uint32_t src_mip_level,
823 uint32_t src_layer,
824 uint32_t width,
825 uint32_t height,
826 const struct v3dv_format *format)
827 {
828 const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
829 const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
830
831 assert(dst->mem && dst->mem->bo);
832 const struct v3dv_bo *dst_bo = dst->mem->bo;
833
834 assert(src->mem && src->mem->bo);
835 const struct v3dv_bo *src_bo = src->mem->bo;
836
837 struct drm_v3d_submit_tfu tfu = {
838 .ios = (height << 16) | width,
839 .bo_handles = {
840 dst_bo->handle,
841 src_bo->handle != dst_bo->handle ? src_bo->handle : 0
842 },
843 };
844
845 const uint32_t src_offset =
846 src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
847 tfu.iia |= src_offset;
848
849 uint32_t icfg;
850 if (src_slice->tiling == V3D_TILING_RASTER) {
851 icfg = V3D_TFU_ICFG_FORMAT_RASTER;
852 } else {
853 icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
854 (src_slice->tiling - V3D_TILING_LINEARTILE);
855 }
856 tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
857
858 const uint32_t dst_offset =
859 dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
860 tfu.ioa |= dst_offset;
861
862 tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
863 (dst_slice->tiling - V3D_TILING_LINEARTILE)) <<
864 V3D_TFU_IOA_FORMAT_SHIFT;
865 tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
866
867 switch (src_slice->tiling) {
868 case V3D_TILING_UIF_NO_XOR:
869 case V3D_TILING_UIF_XOR:
870 tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
871 break;
872 case V3D_TILING_RASTER:
873 tfu.iis |= src_slice->stride / src->cpp;
874 break;
875 default:
876 break;
877 }
878
879 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
880 * OPAD field for the destination (how many extra UIF blocks beyond
881 * those necessary to cover the height).
882 */
883 if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR ||
884 dst_slice->tiling == V3D_TILING_UIF_XOR) {
885 uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
886 uint32_t implicit_padded_height = align(height, uif_block_h);
887 uint32_t icfg =
888 (dst_slice->padded_height - implicit_padded_height) / uif_block_h;
889 tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
890 }
891
892 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
893 }
894
895 static void
emit_clear_image_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,VkImageAspectFlags aspects,uint32_t layer,uint32_t level)896 emit_clear_image_layer_per_tile_list(struct v3dv_job *job,
897 struct v3dv_meta_framebuffer *framebuffer,
898 struct v3dv_image *image,
899 VkImageAspectFlags aspects,
900 uint32_t layer,
901 uint32_t level)
902 {
903 struct v3dv_cl *cl = &job->indirect;
904 v3dv_cl_ensure_space(cl, 200, 1);
905 v3dv_return_if_oom(NULL, job);
906
907 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
908
909 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
910
911 cl_emit(cl, END_OF_LOADS, end);
912
913 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
914
915 emit_image_store(job->device, cl, framebuffer, image, aspects,
916 layer, level, false, false, false);
917
918 cl_emit(cl, END_OF_TILE_MARKER, end);
919
920 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
921
922 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
923 branch.start = tile_list_start;
924 branch.end = v3dv_cl_get_address(cl);
925 }
926 }
927
928 static void
emit_clear_image_layers(struct v3dv_job * job,struct v3dv_image * image,struct v3dv_meta_framebuffer * framebuffer,VkImageAspectFlags aspects,uint32_t min_layer,uint32_t max_layer,uint32_t level)929 emit_clear_image_layers(struct v3dv_job *job,
930 struct v3dv_image *image,
931 struct v3dv_meta_framebuffer *framebuffer,
932 VkImageAspectFlags aspects,
933 uint32_t min_layer,
934 uint32_t max_layer,
935 uint32_t level)
936 {
937 for (uint32_t layer = min_layer; layer < max_layer; layer++) {
938 emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
939 layer, level);
940 emit_supertile_coordinates(job, framebuffer);
941 }
942 }
943
944 void
v3dX(meta_emit_clear_image_rcl)945 v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
946 struct v3dv_image *image,
947 struct v3dv_meta_framebuffer *framebuffer,
948 const union v3dv_clear_value *clear_value,
949 VkImageAspectFlags aspects,
950 uint32_t min_layer,
951 uint32_t max_layer,
952 uint32_t level)
953 {
954 const struct rcl_clear_info clear_info = {
955 .clear_value = clear_value,
956 .image = image,
957 .aspects = aspects,
958 .level = level,
959 };
960
961 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
962 v3dv_return_if_oom(NULL, job);
963
964 emit_frame_setup(job, 0, clear_value);
965 emit_clear_image_layers(job, image, framebuffer, aspects,
966 min_layer, max_layer, level);
967 cl_emit(rcl, END_OF_RENDERING, end);
968 }
969
970 static void
emit_fill_buffer_per_tile_list(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,uint32_t stride)971 emit_fill_buffer_per_tile_list(struct v3dv_job *job,
972 struct v3dv_bo *bo,
973 uint32_t offset,
974 uint32_t stride)
975 {
976 struct v3dv_cl *cl = &job->indirect;
977 v3dv_cl_ensure_space(cl, 200, 1);
978 v3dv_return_if_oom(NULL, job);
979
980 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
981
982 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
983
984 cl_emit(cl, END_OF_LOADS, end);
985
986 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
987
988 emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
989 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
990
991 cl_emit(cl, END_OF_TILE_MARKER, end);
992
993 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
994
995 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
996 branch.start = tile_list_start;
997 branch.end = v3dv_cl_get_address(cl);
998 }
999 }
1000
1001 static void
emit_fill_buffer(struct v3dv_job * job,struct v3dv_bo * bo,uint32_t offset,struct v3dv_meta_framebuffer * framebuffer)1002 emit_fill_buffer(struct v3dv_job *job,
1003 struct v3dv_bo *bo,
1004 uint32_t offset,
1005 struct v3dv_meta_framebuffer *framebuffer)
1006 {
1007 const uint32_t stride = job->frame_tiling.width * 4;
1008 emit_fill_buffer_per_tile_list(job, bo, offset, stride);
1009 emit_supertile_coordinates(job, framebuffer);
1010 }
1011
1012 void
v3dX(meta_emit_fill_buffer_rcl)1013 v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
1014 struct v3dv_bo *bo,
1015 uint32_t offset,
1016 struct v3dv_meta_framebuffer *framebuffer,
1017 uint32_t data)
1018 {
1019 const union v3dv_clear_value clear_value = {
1020 .color = { data, 0, 0, 0 },
1021 };
1022
1023 const struct rcl_clear_info clear_info = {
1024 .clear_value = &clear_value,
1025 .image = NULL,
1026 .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
1027 .level = 0,
1028 };
1029
1030 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1031 v3dv_return_if_oom(NULL, job);
1032
1033 emit_frame_setup(job, 0, &clear_value);
1034 emit_fill_buffer(job, bo, offset, framebuffer);
1035 cl_emit(rcl, END_OF_RENDERING, end);
1036 }
1037
1038
1039 static void
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job * job,struct v3dv_meta_framebuffer * framebuffer,struct v3dv_image * image,struct v3dv_buffer * buffer,uint32_t layer,const VkBufferImageCopy2KHR * region)1040 emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
1041 struct v3dv_meta_framebuffer *framebuffer,
1042 struct v3dv_image *image,
1043 struct v3dv_buffer *buffer,
1044 uint32_t layer,
1045 const VkBufferImageCopy2KHR *region)
1046 {
1047 struct v3dv_cl *cl = &job->indirect;
1048 v3dv_cl_ensure_space(cl, 200, 1);
1049 v3dv_return_if_oom(NULL, job);
1050
1051 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1052
1053 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1054
1055 const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource;
1056 assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
1057 layer < image->vk.extent.depth);
1058
1059 /* Load TLB from buffer */
1060 uint32_t width, height;
1061 if (region->bufferRowLength == 0)
1062 width = region->imageExtent.width;
1063 else
1064 width = region->bufferRowLength;
1065
1066 if (region->bufferImageHeight == 0)
1067 height = region->imageExtent.height;
1068 else
1069 height = region->bufferImageHeight;
1070
1071 /* Handle copy to compressed format using a compatible format */
1072 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
1073 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
1074
1075 uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1076 1 : image->cpp;
1077 uint32_t buffer_stride = width * cpp;
1078 uint32_t buffer_offset =
1079 buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
1080
1081 uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
1082 false, false, true);
1083
1084 emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
1085 buffer_offset, buffer_stride, format);
1086
1087 /* Because we can't do raster loads/stores of Z/S formats we need to
1088 * use a color tile buffer with a compatible RGBA color format instead.
1089 * However, when we are uploading a single aspect to a combined
1090 * depth/stencil image we have the problem that our tile buffer stores don't
1091 * allow us to mask out the other aspect, so we always write all four RGBA
1092 * channels to the image and we end up overwriting that other aspect with
1093 * undefined values. To work around that, we first load the aspect we are
1094 * not copying from the image memory into a proper Z/S tile buffer. Then we
1095 * do our store from the color buffer for the aspect we are copying, and
1096 * after that, we do another store from the Z/S tile buffer to restore the
1097 * other aspect to its original value.
1098 */
1099 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1100 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1101 emit_image_load(job->device, cl, framebuffer, image,
1102 VK_IMAGE_ASPECT_STENCIL_BIT,
1103 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1104 false, false);
1105 } else {
1106 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1107 emit_image_load(job->device, cl, framebuffer, image,
1108 VK_IMAGE_ASPECT_DEPTH_BIT,
1109 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1110 false, false);
1111 }
1112 }
1113
1114 cl_emit(cl, END_OF_LOADS, end);
1115
1116 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1117
1118 /* Store TLB to image */
1119 emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
1120 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1121 false, true, false);
1122
1123 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1124 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1125 emit_image_store(job->device, cl, framebuffer, image,
1126 VK_IMAGE_ASPECT_STENCIL_BIT,
1127 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1128 false, false, false);
1129 } else {
1130 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1131 emit_image_store(job->device, cl, framebuffer, image,
1132 VK_IMAGE_ASPECT_DEPTH_BIT,
1133 imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1134 false, false, false);
1135 }
1136 }
1137
1138 cl_emit(cl, END_OF_TILE_MARKER, end);
1139
1140 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1141
1142 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1143 branch.start = tile_list_start;
1144 branch.end = v3dv_cl_get_address(cl);
1145 }
1146 }
1147
1148 static void
emit_copy_buffer_to_layer(struct v3dv_job * job,struct v3dv_image * image,struct v3dv_buffer * buffer,struct v3dv_meta_framebuffer * framebuffer,uint32_t layer,const VkBufferImageCopy2KHR * region)1149 emit_copy_buffer_to_layer(struct v3dv_job *job,
1150 struct v3dv_image *image,
1151 struct v3dv_buffer *buffer,
1152 struct v3dv_meta_framebuffer *framebuffer,
1153 uint32_t layer,
1154 const VkBufferImageCopy2KHR *region)
1155 {
1156 emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
1157 layer, region);
1158 emit_supertile_coordinates(job, framebuffer);
1159 }
1160
1161 void
v3dX(meta_emit_copy_buffer_to_image_rcl)1162 v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
1163 struct v3dv_image *image,
1164 struct v3dv_buffer *buffer,
1165 struct v3dv_meta_framebuffer *framebuffer,
1166 const VkBufferImageCopy2KHR *region)
1167 {
1168 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1169 v3dv_return_if_oom(NULL, job);
1170
1171 emit_frame_setup(job, 0, NULL);
1172 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
1173 emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
1174 cl_emit(rcl, END_OF_RENDERING, end);
1175 }
1176
1177 /* Figure out a TLB size configuration for a number of pixels to process.
1178 * Beware that we can't "render" more than 4096x4096 pixels in a single job,
1179 * if the pixel count is larger than this, the caller might need to split
1180 * the job and call this function multiple times.
1181 */
1182 static void
framebuffer_size_for_pixel_count(uint32_t num_pixels,uint32_t * width,uint32_t * height)1183 framebuffer_size_for_pixel_count(uint32_t num_pixels,
1184 uint32_t *width,
1185 uint32_t *height)
1186 {
1187 assert(num_pixels > 0);
1188
1189 const uint32_t max_dim_pixels = 4096;
1190 const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
1191
1192 uint32_t w, h;
1193 if (num_pixels > max_pixels) {
1194 w = max_dim_pixels;
1195 h = max_dim_pixels;
1196 } else {
1197 w = num_pixels;
1198 h = 1;
1199 while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
1200 w >>= 1;
1201 h <<= 1;
1202 }
1203 }
1204 assert(w <= max_dim_pixels && h <= max_dim_pixels);
1205 assert(w * h <= num_pixels);
1206 assert(w > 0 && h > 0);
1207
1208 *width = w;
1209 *height = h;
1210 }
1211
1212 struct v3dv_job *
v3dX(meta_copy_buffer)1213 v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1214 struct v3dv_bo *dst,
1215 uint32_t dst_offset,
1216 struct v3dv_bo *src,
1217 uint32_t src_offset,
1218 const VkBufferCopy2KHR *region)
1219 {
1220 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1221 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1222
1223 /* Select appropriate pixel format for the copy operation based on the
1224 * size to copy and the alignment of the source and destination offsets.
1225 */
1226 src_offset += region->srcOffset;
1227 dst_offset += region->dstOffset;
1228 uint32_t item_size = 4;
1229 while (item_size > 1 &&
1230 (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
1231 item_size /= 2;
1232 }
1233
1234 while (item_size > 1 && region->size % item_size != 0)
1235 item_size /= 2;
1236
1237 assert(region->size % item_size == 0);
1238 uint32_t num_items = region->size / item_size;
1239 assert(num_items > 0);
1240
1241 uint32_t format;
1242 VkFormat vk_format;
1243 switch (item_size) {
1244 case 4:
1245 format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
1246 vk_format = VK_FORMAT_R8G8B8A8_UINT;
1247 break;
1248 case 2:
1249 format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
1250 vk_format = VK_FORMAT_R8G8_UINT;
1251 break;
1252 default:
1253 format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
1254 vk_format = VK_FORMAT_R8_UINT;
1255 break;
1256 }
1257
1258 struct v3dv_job *job = NULL;
1259 while (num_items > 0) {
1260 job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1261 if (!job)
1262 return NULL;
1263
1264 uint32_t width, height;
1265 framebuffer_size_for_pixel_count(num_items, &width, &height);
1266
1267 v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
1268
1269 struct v3dv_meta_framebuffer framebuffer;
1270 v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
1271 &job->frame_tiling);
1272
1273 v3dX(job_emit_binning_flush)(job);
1274
1275 v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
1276 &framebuffer, format, item_size);
1277
1278 v3dv_cmd_buffer_finish_job(cmd_buffer);
1279
1280 const uint32_t items_copied = width * height;
1281 const uint32_t bytes_copied = items_copied * item_size;
1282 num_items -= items_copied;
1283 src_offset += bytes_copied;
1284 dst_offset += bytes_copied;
1285 }
1286
1287 return job;
1288 }
1289
1290 void
v3dX(meta_fill_buffer)1291 v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1292 struct v3dv_bo *bo,
1293 uint32_t offset,
1294 uint32_t size,
1295 uint32_t data)
1296 {
1297 assert(size > 0 && size % 4 == 0);
1298 assert(offset + size <= bo->size);
1299
1300 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1301 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1302 uint32_t num_items = size / 4;
1303
1304 while (num_items > 0) {
1305 struct v3dv_job *job =
1306 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1307 if (!job)
1308 return;
1309
1310 uint32_t width, height;
1311 framebuffer_size_for_pixel_count(num_items, &width, &height);
1312
1313 v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
1314
1315 struct v3dv_meta_framebuffer framebuffer;
1316 v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
1317 internal_type, &job->frame_tiling);
1318
1319 v3dX(job_emit_binning_flush)(job);
1320
1321 v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
1322
1323 v3dv_cmd_buffer_finish_job(cmd_buffer);
1324
1325 const uint32_t items_copied = width * height;
1326 const uint32_t bytes_copied = items_copied * 4;
1327 num_items -= items_copied;
1328 offset += bytes_copied;
1329 }
1330 }
1331
1332 void
v3dX(meta_framebuffer_init)1333 v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
1334 VkFormat vk_format,
1335 uint32_t internal_type,
1336 const struct v3dv_frame_tiling *tiling)
1337 {
1338 fb->internal_type = internal_type;
1339
1340 /* Supertile coverage always starts at 0,0 */
1341 uint32_t supertile_w_in_pixels =
1342 tiling->tile_width * tiling->supertile_width;
1343 uint32_t supertile_h_in_pixels =
1344 tiling->tile_height * tiling->supertile_height;
1345
1346 fb->min_x_supertile = 0;
1347 fb->min_y_supertile = 0;
1348 fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
1349 fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
1350
1351 fb->vk_format = vk_format;
1352 fb->format = v3dX(get_format)(vk_format);
1353
1354 fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1355 if (vk_format_is_depth_or_stencil(vk_format))
1356 fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
1357 }
1358