1 /*
2  * Copyright (C) 2008 VMware, Inc.
3  * Copyright (C) 2014 Broadcom
4  * Copyright (C) 2018-2019 Alyssa Rosenzweig
5  * Copyright (C) 2019-2020 Collabora, Ltd.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *
26  */
27 
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
31 
32 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
33  * texture is created, so we can keep these hanging around in GPU memory in a
34  * dedicated BO and not have to worry. In practice there are some minor gotchas
35  * with this (the driver sometimes will change the format of a texture on the
36  * fly for compression) but it's fast enough to just regenerate the descriptor
37  * in those cases, rather than monkeypatching at drawtime.
38  *
39  * A texture descriptor consists of a 32-byte mali_texture_descriptor structure
40  * followed by a variable number of pointers. Due to this variance and
41  * potentially large size, we actually upload directly rather than returning
42  * the descriptor. Whether the user does a copy themselves or not is irrelevant
43  * to us here.
44  */
45 
46 /* Check if we need to set a custom stride by computing the "expected"
47  * stride and comparing it to what the user actually wants. Only applies
48  * to linear textures, since tiled/compressed textures have strict
49  * alignment requirements for their strides as it is */
50 
51 static bool
panfrost_needs_explicit_stride(struct panfrost_slice * slices,uint16_t width,unsigned first_level,unsigned last_level,unsigned bytes_per_pixel)52 panfrost_needs_explicit_stride(
53                 struct panfrost_slice *slices,
54                 uint16_t width,
55                 unsigned first_level, unsigned last_level,
56                 unsigned bytes_per_pixel)
57 {
58         for (unsigned l = first_level; l <= last_level; ++l) {
59                 unsigned actual = slices[l].stride;
60                 unsigned expected = u_minify(width, l) * bytes_per_pixel;
61 
62                 if (actual != expected)
63                         return true;
64         }
65 
66         return false;
67 }
68 
69 /* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
70  * in the hardware, but in fact can be parametrized to have various widths and
71  * heights for the so-called "stretch factor". It turns out these parameters
72  * are stuffed in the bottom bits of the payload pointers. This functions
73  * computes these magic stuffing constants based on the ASTC format in use. The
74  * constant in a given dimension is 3-bits, and two are stored side-by-side for
75  * each active dimension.
76  */
77 
78 static unsigned
panfrost_astc_stretch(unsigned dim)79 panfrost_astc_stretch(unsigned dim)
80 {
81         assert(dim >= 4 && dim <= 12);
82         return MIN2(dim, 11) - 4;
83 }
84 
85 /* Texture addresses are tagged with information about compressed formats.
86  * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
87  * RGBA only).
88  * For ASTC, this is a "stretch factor" encoding the block size. */
89 
90 static unsigned
panfrost_compression_tag(const struct util_format_description * desc,enum mali_format format,enum mali_texture_layout layout)91 panfrost_compression_tag(
92                 const struct util_format_description *desc,
93                 enum mali_format format, enum mali_texture_layout layout)
94 {
95         if (layout == MALI_TEXTURE_AFBC)
96                 return desc->nr_channels >= 3;
97         else if (format == MALI_ASTC_2D_LDR || format == MALI_ASTC_2D_HDR)
98                 return (panfrost_astc_stretch(desc->block.height) << 3) |
99                         panfrost_astc_stretch(desc->block.width);
100         else
101                 return 0;
102 }
103 
104 
105 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
106  * need to fix this up. TODO: logic wrong in the asserted out cases ...
107  * can they happen, perhaps from cubemap arrays? */
108 
109 static void
panfrost_adjust_cube_dimensions(unsigned * first_face,unsigned * last_face,unsigned * first_layer,unsigned * last_layer)110 panfrost_adjust_cube_dimensions(
111                 unsigned *first_face, unsigned *last_face,
112                 unsigned *first_layer, unsigned *last_layer)
113 {
114         *first_face = *first_layer % 6;
115         *last_face = *last_layer % 6;
116         *first_layer /= 6;
117         *last_layer /= 6;
118 
119         assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
120 }
121 
122 /* Following the texture descriptor is a number of pointers. How many? */
123 
124 static unsigned
panfrost_texture_num_elements(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,bool is_cube,bool manual_stride)125 panfrost_texture_num_elements(
126                 unsigned first_level, unsigned last_level,
127                 unsigned first_layer, unsigned last_layer,
128                 unsigned nr_samples,
129                 bool is_cube, bool manual_stride)
130 {
131         unsigned first_face  = 0, last_face = 0;
132 
133         if (is_cube) {
134                 panfrost_adjust_cube_dimensions(&first_face, &last_face,
135                                 &first_layer, &last_layer);
136         }
137 
138         unsigned levels = 1 + last_level - first_level;
139         unsigned layers = 1 + last_layer - first_layer;
140         unsigned faces  = 1 + last_face  - first_face;
141         unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
142 
143         if (manual_stride)
144                 num_elements *= 2;
145 
146         return num_elements;
147 }
148 
149 /* Conservative estimate of the size of the texture payload a priori.
150  * Average case, size equal to the actual size. Worst case, off by 2x (if
151  * a manual stride is not needed on a linear texture). Returned value
152  * must be greater than or equal to the actual size, so it's safe to use
153  * as an allocation amount */
154 
155 unsigned
panfrost_estimate_texture_payload_size(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,enum mali_texture_type type,enum mali_texture_layout layout)156 panfrost_estimate_texture_payload_size(
157                 unsigned first_level, unsigned last_level,
158                 unsigned first_layer, unsigned last_layer,
159                 unsigned nr_samples,
160                 enum mali_texture_type type, enum mali_texture_layout layout)
161 {
162         /* Assume worst case */
163         unsigned manual_stride = (layout == MALI_TEXTURE_LINEAR);
164 
165         unsigned elements = panfrost_texture_num_elements(
166                         first_level, last_level,
167                         first_layer, last_layer,
168                         nr_samples,
169                         type == MALI_TEX_CUBE, manual_stride);
170 
171         return sizeof(mali_ptr) * elements;
172 }
173 
174 /* Bifrost requires a tile stride for tiled textures. This stride is computed
175  * as (16 * bpp * width) assuming there is at least one tile (width >= 16).
176  * Otherwise if height <= 16, the blob puts zero. Interactions with AFBC are
177  * currently unknown.
178  */
179 
180 static unsigned
panfrost_nonlinear_stride(enum mali_texture_layout layout,unsigned bytes_per_pixel,unsigned width,unsigned height)181 panfrost_nonlinear_stride(enum mali_texture_layout layout,
182                 unsigned bytes_per_pixel,
183                 unsigned width,
184                 unsigned height)
185 {
186         if (layout == MALI_TEXTURE_TILED) {
187                 return (height <= 16) ? 0 : (16 * bytes_per_pixel * ALIGN_POT(width, 16));
188         } else {
189                 unreachable("TODO: AFBC on Bifrost");
190         }
191 }
192 
193 static void
panfrost_emit_texture_payload(mali_ptr * payload,const struct util_format_description * desc,enum mali_format mali_format,enum mali_texture_type type,enum mali_texture_layout layout,unsigned width,unsigned height,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,bool manual_stride,mali_ptr base,struct panfrost_slice * slices)194 panfrost_emit_texture_payload(
195         mali_ptr *payload,
196         const struct util_format_description *desc,
197         enum mali_format mali_format,
198         enum mali_texture_type type,
199         enum mali_texture_layout layout,
200         unsigned width, unsigned height,
201         unsigned first_level, unsigned last_level,
202         unsigned first_layer, unsigned last_layer,
203         unsigned nr_samples,
204         unsigned cube_stride,
205         bool manual_stride,
206         mali_ptr base,
207         struct panfrost_slice *slices)
208 {
209         base |= panfrost_compression_tag(desc, mali_format, layout);
210 
211         /* Inject the addresses in, interleaving array indices, mip levels,
212          * cube faces, and strides in that order */
213 
214         unsigned first_face  = 0, last_face = 0, face_mult = 1;
215 
216         if (type == MALI_TEX_CUBE) {
217                 face_mult = 6;
218                 panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);
219         }
220 
221         nr_samples = MAX2(nr_samples, 1);
222 
223         unsigned idx = 0;
224 
225         for (unsigned w = first_layer; w <= last_layer; ++w) {
226                 for (unsigned l = first_level; l <= last_level; ++l) {
227                         for (unsigned f = first_face; f <= last_face; ++f) {
228                                 for (unsigned s = 0; s < nr_samples; ++s) {
229                                         payload[idx++] = base + panfrost_texture_offset(
230                                                         slices, type == MALI_TEX_3D,
231                                                         cube_stride, l, w * face_mult + f, s);
232 
233                                         if (manual_stride) {
234                                                 payload[idx++] = (layout == MALI_TEXTURE_LINEAR) ?
235                                                         slices[l].stride :
236                                                         panfrost_nonlinear_stride(layout,
237                                                                         MAX2(desc->block.bits / 8, 1),
238                                                                         u_minify(width, l),
239                                                                         u_minify(height, l));
240                                         }
241                                 }
242                         }
243                 }
244         }
245 }
246 
247 #define MALI_SWIZZLE_R001 \
248         (MALI_CHANNEL_RED << 0) | \
249         (MALI_CHANNEL_ZERO << 3) | \
250         (MALI_CHANNEL_ZERO << 6) | \
251         (MALI_CHANNEL_ONE << 9)
252 
253 #define MALI_SWIZZLE_A001 \
254         (MALI_CHANNEL_ALPHA << 0) | \
255         (MALI_CHANNEL_ZERO << 3) | \
256         (MALI_CHANNEL_ZERO << 6) | \
257         (MALI_CHANNEL_ONE << 9)
258 
259 
260 void
panfrost_new_texture(void * out,uint16_t width,uint16_t height,uint16_t depth,uint16_t array_size,enum pipe_format format,enum mali_texture_type type,enum mali_texture_layout layout,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,unsigned swizzle,mali_ptr base,struct panfrost_slice * slices)261 panfrost_new_texture(
262         void *out,
263         uint16_t width, uint16_t height,
264         uint16_t depth, uint16_t array_size,
265         enum pipe_format format,
266         enum mali_texture_type type,
267         enum mali_texture_layout layout,
268         unsigned first_level, unsigned last_level,
269         unsigned first_layer, unsigned last_layer,
270         unsigned nr_samples,
271         unsigned cube_stride,
272         unsigned swizzle,
273         mali_ptr base,
274         struct panfrost_slice *slices)
275 {
276         const struct util_format_description *desc =
277                 util_format_description(format);
278 
279         unsigned bytes_per_pixel = util_format_get_blocksize(format);
280 
281         enum mali_format mali_format = panfrost_pipe_format_table[desc->format].hw;
282         assert(mali_format);
283 
284         bool manual_stride = (layout == MALI_TEXTURE_LINEAR)
285                 && panfrost_needs_explicit_stride(slices, width,
286                                 first_level, last_level, bytes_per_pixel);
287 
288         struct mali_texture_descriptor descriptor = {
289                 .width = MALI_POSITIVE(u_minify(width, first_level)),
290                 .height = MALI_POSITIVE(u_minify(height, first_level)),
291                 .depth = MALI_POSITIVE(u_minify(depth, first_level)),
292                 .array_size = MALI_POSITIVE(array_size),
293                 .format = {
294                         .swizzle = (format == PIPE_FORMAT_X24S8_UINT) ?
295                                 MALI_SWIZZLE_A001 :
296                                 (format == PIPE_FORMAT_S8_UINT) ?
297                                 MALI_SWIZZLE_R001 :
298                                 panfrost_translate_swizzle_4(desc->swizzle),
299                         .format = mali_format,
300                         .srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB),
301                         .type = type,
302                         .layout = layout,
303                         .manual_stride = manual_stride,
304                         .unknown2 = 1,
305                 },
306                 .levels = last_level - first_level,
307                 .swizzle = swizzle
308         };
309 
310         memcpy(out, &descriptor, sizeof(descriptor));
311 
312         mali_ptr *payload = (mali_ptr *) (out + sizeof(struct mali_texture_descriptor));
313         panfrost_emit_texture_payload(
314                 payload,
315                 desc,
316                 mali_format,
317                 type,
318                 layout,
319                 width, height,
320                 first_level, last_level,
321                 first_layer, last_layer,
322                 nr_samples,
323                 cube_stride,
324                 manual_stride,
325                 base,
326                 slices);
327 }
328 
329 void
panfrost_new_texture_bifrost(struct bifrost_texture_descriptor * descriptor,uint16_t width,uint16_t height,uint16_t depth,uint16_t array_size,enum pipe_format format,enum mali_texture_type type,enum mali_texture_layout layout,unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,unsigned cube_stride,unsigned swizzle,mali_ptr base,struct panfrost_slice * slices,struct panfrost_bo * payload)330 panfrost_new_texture_bifrost(
331         struct bifrost_texture_descriptor *descriptor,
332         uint16_t width, uint16_t height,
333         uint16_t depth, uint16_t array_size,
334         enum pipe_format format,
335         enum mali_texture_type type,
336         enum mali_texture_layout layout,
337         unsigned first_level, unsigned last_level,
338         unsigned first_layer, unsigned last_layer,
339         unsigned nr_samples,
340         unsigned cube_stride,
341         unsigned swizzle,
342         mali_ptr base,
343         struct panfrost_slice *slices,
344         struct panfrost_bo *payload)
345 {
346         const struct util_format_description *desc =
347                 util_format_description(format);
348 
349         enum mali_format mali_format = panfrost_pipe_format_table[desc->format].hw;
350         assert(mali_format);
351 
352         panfrost_emit_texture_payload(
353                 (mali_ptr *) payload->cpu,
354                 desc,
355                 mali_format,
356                 type,
357                 layout,
358                 width, height,
359                 first_level, last_level,
360                 first_layer, last_layer,
361                 nr_samples,
362                 cube_stride,
363                 true, /* Stride explicit on Bifrost */
364                 base,
365                 slices);
366 
367         descriptor->format_unk = 0x2;
368         descriptor->type = type;
369         descriptor->format = mali_format;
370         descriptor->srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
371         descriptor->format_unk3 = 0x0;
372         descriptor->width = MALI_POSITIVE(u_minify(width, first_level));
373         descriptor->height = MALI_POSITIVE(u_minify(height, first_level));
374         descriptor->swizzle = swizzle;
375         descriptor->layout = layout;
376         descriptor->levels = last_level - first_level;
377         descriptor->unk1 = 0x0;
378         descriptor->levels_unk = 0;
379         descriptor->level_2 = last_level - first_level;
380         descriptor->payload = payload->gpu;
381         descriptor->array_size = MALI_POSITIVE(array_size);
382         descriptor->unk4 = 0x0;
383         descriptor->depth = MALI_POSITIVE(u_minify(depth, first_level));
384         descriptor->unk5 = 0x0;
385 }
386 
387 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
388  * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
389  * This feature is also known as "transaction elimination". */
390 
391 #define CHECKSUM_TILE_WIDTH 16
392 #define CHECKSUM_TILE_HEIGHT 16
393 #define CHECKSUM_BYTES_PER_TILE 8
394 
395 unsigned
panfrost_compute_checksum_size(struct panfrost_slice * slice,unsigned width,unsigned height)396 panfrost_compute_checksum_size(
397         struct panfrost_slice *slice,
398         unsigned width,
399         unsigned height)
400 {
401         unsigned aligned_width = ALIGN_POT(width, CHECKSUM_TILE_WIDTH);
402         unsigned aligned_height = ALIGN_POT(height, CHECKSUM_TILE_HEIGHT);
403 
404         unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH;
405         unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT;
406 
407         slice->checksum_stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
408 
409         return slice->checksum_stride * tile_count_y;
410 }
411 
412 unsigned
panfrost_get_layer_stride(struct panfrost_slice * slices,bool is_3d,unsigned cube_stride,unsigned level)413 panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
414 {
415         return is_3d ? slices[level].size0 : cube_stride;
416 }
417 
418 /* Computes the offset into a texture at a particular level/face. Add to
419  * the base address of a texture to get the address to that level/face */
420 
421 unsigned
panfrost_texture_offset(struct panfrost_slice * slices,bool is_3d,unsigned cube_stride,unsigned level,unsigned face,unsigned sample)422 panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face, unsigned sample)
423 {
424         unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
425         return slices[level].offset + (face * layer_stride) + (sample * slices[level].size0);
426 }
427