1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  * Copyright 2015-2021 Advanced Micro Devices, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * on the rights to use, copy, modify, merge, publish, distribute, sub
10  * license, and/or sell copies of the Software, and to permit persons to whom
11  * the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23  * USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 #include "util/u_memory.h"
26 #include "radv_cs.h"
27 #include "radv_private.h"
28 #include "sid.h"
29 
30 static bool
radv_translate_format_to_hw(struct radeon_info * info,VkFormat format,unsigned * hw_fmt,unsigned * hw_type)31 radv_translate_format_to_hw(struct radeon_info *info, VkFormat format, unsigned *hw_fmt,
32                             unsigned *hw_type)
33 {
34    const struct util_format_description *desc = vk_format_description(format);
35    *hw_fmt = radv_translate_colorformat(format);
36 
37    int firstchan;
38    for (firstchan = 0; firstchan < 4; firstchan++) {
39       if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
40          break;
41       }
42    }
43    if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
44       *hw_type = V_028C70_NUMBER_FLOAT;
45    } else {
46       *hw_type = V_028C70_NUMBER_UNORM;
47       if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
48          *hw_type = V_028C70_NUMBER_SRGB;
49       else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
50          if (desc->channel[firstchan].pure_integer) {
51             *hw_type = V_028C70_NUMBER_SINT;
52          } else {
53             assert(desc->channel[firstchan].normalized);
54             *hw_type = V_028C70_NUMBER_SNORM;
55          }
56       } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
57          if (desc->channel[firstchan].pure_integer) {
58             *hw_type = V_028C70_NUMBER_UINT;
59          } else {
60             assert(desc->channel[firstchan].normalized);
61             *hw_type = V_028C70_NUMBER_UNORM;
62          }
63       } else {
64          return false;
65       }
66    }
67    return true;
68 }
69 
70 static bool
radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,struct radv_buffer * buffer,const VkBufferImageCopy2KHR * region)71 radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
72                                      struct radv_buffer *buffer,
73                                      const VkBufferImageCopy2KHR *region)
74 {
75    assert(image->plane_count == 1);
76    struct radv_device *device = cmd_buffer->device;
77    unsigned bpp = image->planes[0].surface.bpe;
78    uint64_t dst_address = buffer->bo->va;
79    uint64_t src_address = image->bo->va + image->planes[0].surface.u.gfx9.surf_offset;
80    unsigned src_pitch = image->planes[0].surface.u.gfx9.surf_pitch;
81    unsigned copy_width = DIV_ROUND_UP(image->info.width, image->planes[0].surface.blk_w);
82    unsigned copy_height = DIV_ROUND_UP(image->info.height, image->planes[0].surface.blk_h);
83    bool tmz = false;
84 
85    uint32_t ib_pad_dw_mask = cmd_buffer->device->physical_device->rad_info.ib_pad_dw_mask[RING_DMA];
86 
87    /* Linear -> linear sub-window copy. */
88    if (image->planes[0].surface.is_linear) {
89       ASSERTED unsigned cdw_max =
90          radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, align(8, ib_pad_dw_mask + 1));
91       unsigned bytes = src_pitch * copy_height * bpp;
92 
93       if (!(bytes < (1u << 22)))
94          return false;
95 
96       radeon_emit(cmd_buffer->cs, 0x00000000);
97 
98       src_address += image->planes[0].surface.u.gfx9.offset[0];
99 
100       radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
101                                                   CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
102       radeon_emit(cmd_buffer->cs, bytes);
103       radeon_emit(cmd_buffer->cs, 0);
104       radeon_emit(cmd_buffer->cs, src_address);
105       radeon_emit(cmd_buffer->cs, src_address >> 32);
106       radeon_emit(cmd_buffer->cs, dst_address);
107       radeon_emit(cmd_buffer->cs, dst_address >> 32);
108 
109       while (cmd_buffer->cs->cdw & ib_pad_dw_mask)
110          radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD);
111 
112       assert(cmd_buffer->cs->cdw <= cdw_max);
113 
114       return true;
115    }
116    /* Tiled sub-window copy -> Linear */
117    else {
118       unsigned tiled_width = copy_width;
119       unsigned tiled_height = copy_height;
120       unsigned linear_pitch = region->bufferRowLength;
121       unsigned linear_slice_pitch = region->bufferRowLength * copy_height;
122       uint64_t tiled_address = src_address;
123       uint64_t linear_address = dst_address;
124       bool is_v5 = device->physical_device->rad_info.chip_class >= GFX10;
125       /* Only SDMA 5 supports DCC with SDMA */
126       bool dcc = radv_dcc_enabled(image, 0) && is_v5;
127 
128       /* Check if everything fits into the bitfields */
129       if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) && linear_pitch < (1 << 14) &&
130             linear_slice_pitch < (1 << 28) && copy_width < (1 << 14) && copy_height < (1 << 14)))
131          return false;
132 
133       ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
134                                                      align(15 + dcc * 3, ib_pad_dw_mask + 1));
135 
136       radeon_emit(cmd_buffer->cs, 0x00000000);
137       radeon_emit(cmd_buffer->cs,
138                   CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW,
139                                   (tmz ? 4 : 0)) |
140                      dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 |
141                      1u << 31);
142       radeon_emit(cmd_buffer->cs,
143                   (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
144       radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32));
145       radeon_emit(cmd_buffer->cs, 0);
146       radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16));
147       radeon_emit(cmd_buffer->cs, (tiled_height - 1));
148       radeon_emit(
149          cmd_buffer->cs,
150          util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 |
151             image->planes[0].surface.u.gfx9.resource_type << 9 |
152             (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch)
153                << 16);
154       radeon_emit(cmd_buffer->cs, (uint32_t)linear_address);
155       radeon_emit(cmd_buffer->cs, (uint32_t)(linear_address >> 32));
156       radeon_emit(cmd_buffer->cs, 0);
157       radeon_emit(cmd_buffer->cs, ((linear_pitch - 1) << 16));
158       radeon_emit(cmd_buffer->cs, linear_slice_pitch - 1);
159       radeon_emit(cmd_buffer->cs, (copy_width - 1) | ((copy_height - 1) << 16));
160       radeon_emit(cmd_buffer->cs, 0);
161 
162       if (dcc) {
163          unsigned hw_fmt, hw_type;
164          uint64_t md_address = tiled_address + image->planes[0].surface.meta_offset;
165 
166          radv_translate_format_to_hw(&device->physical_device->rad_info, image->vk_format, &hw_fmt,
167                                      &hw_type);
168 
169          /* Add metadata */
170          radeon_emit(cmd_buffer->cs, (uint32_t)md_address);
171          radeon_emit(cmd_buffer->cs, (uint32_t)(md_address >> 32));
172          radeon_emit(cmd_buffer->cs,
173                      hw_fmt | vi_alpha_is_on_msb(device, image->vk_format) << 8 | hw_type << 9 |
174                         image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
175                         V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 |
176                         image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31);
177       }
178 
179       while (cmd_buffer->cs->cdw & ib_pad_dw_mask)
180          radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD);
181 
182       assert(cmd_buffer->cs->cdw <= cdw_max);
183 
184       return true;
185    }
186 
187    return false;
188 }
189 
190 bool
radv_sdma_copy_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,struct radv_buffer * buffer,const VkBufferImageCopy2KHR * region)191 radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
192                      struct radv_buffer *buffer, const VkBufferImageCopy2KHR *region)
193 {
194    assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9);
195    return radv_sdma_v4_v5_copy_image_to_buffer(cmd_buffer, image, buffer, region);
196 }
197