1 /*
2  * Copyright (C) 2008 VMware, Inc.
3  * Copyright (C) 2014 Broadcom
4  * Copyright (C) 2018-2019 Alyssa Rosenzweig
5  * Copyright (C) 2019-2020 Collabora, Ltd.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24  * SOFTWARE.
25  *
26  */
27 
28 #include "util/macros.h"
29 #include "util/u_math.h"
30 #include "pan_texture.h"
31 #include "panfrost-quirks.h"
32 
33 #ifndef PAN_ARCH
34 
35 /* Generates a texture descriptor. Ideally, descriptors are immutable after the
36  * texture is created, so we can keep these hanging around in GPU memory in a
37  * dedicated BO and not have to worry. In practice there are some minor gotchas
38  * with this (the driver sometimes will change the format of a texture on the
39  * fly for compression) but it's fast enough to just regenerate the descriptor
40  * in those cases, rather than monkeypatching at drawtime. A texture descriptor
41  * consists of a 32-byte header followed by pointers.
42  */
43 
44 /* List of supported modifiers, in descending order of preference. AFBC is
45  * faster than u-interleaved tiling which is faster than linear. Within AFBC,
46  * enabling the YUV-like transform is typically a win where possible. */
47 
48 uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
49         DRM_FORMAT_MOD_ARM_AFBC(
50                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
51                 AFBC_FORMAT_MOD_SPARSE |
52                 AFBC_FORMAT_MOD_YTR),
53 
54         DRM_FORMAT_MOD_ARM_AFBC(
55                 AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
56                 AFBC_FORMAT_MOD_SPARSE),
57 
58         DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
59         DRM_FORMAT_MOD_LINEAR
60 };
61 
62 /* If not explicitly, line stride is calculated for block-based formats as
63  * (ceil(width / block_width) * block_size). As a special case, this is left
64  * zero if there is only a single block vertically. So, we have a helper to
65  * extract the dimensions of a block-based format and use that to calculate the
66  * line stride as such.
67  */
68 
69 unsigned
panfrost_block_dim(uint64_t modifier,bool width,unsigned plane)70 panfrost_block_dim(uint64_t modifier, bool width, unsigned plane)
71 {
72         if (!drm_is_afbc(modifier)) {
73                 assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
74                 return 16;
75         }
76 
77         switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
78         case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
79                 return 16;
80         case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
81                 return width ? 32 : 8;
82         case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
83                 return width ? 64 : 4;
84         case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
85                 return plane ? (width ? 64 : 4) : (width ? 32 : 8);
86         default:
87                 unreachable("Invalid AFBC block size");
88         }
89 }
90 
91 /* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
92  * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
93  * This feature is also known as "transaction elimination". */
94 
95 #define CHECKSUM_TILE_WIDTH 16
96 #define CHECKSUM_TILE_HEIGHT 16
97 #define CHECKSUM_BYTES_PER_TILE 8
98 
99 unsigned
panfrost_compute_checksum_size(struct pan_image_slice_layout * slice,unsigned width,unsigned height)100 panfrost_compute_checksum_size(
101         struct pan_image_slice_layout *slice,
102         unsigned width,
103         unsigned height)
104 {
105         unsigned tile_count_x = DIV_ROUND_UP(width, CHECKSUM_TILE_WIDTH);
106         unsigned tile_count_y = DIV_ROUND_UP(height, CHECKSUM_TILE_HEIGHT);
107 
108         slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;
109 
110         return slice->crc.stride * tile_count_y;
111 }
112 
113 unsigned
panfrost_get_layer_stride(const struct pan_image_layout * layout,unsigned level)114 panfrost_get_layer_stride(const struct pan_image_layout *layout,
115                           unsigned level)
116 {
117         if (layout->dim != MALI_TEXTURE_DIMENSION_3D)
118                 return layout->array_stride;
119         else if (drm_is_afbc(layout->modifier))
120                 return layout->slices[level].afbc.surface_stride;
121         else
122                 return layout->slices[level].surface_stride;
123 }
124 
125 /* Computes the offset into a texture at a particular level/face. Add to
126  * the base address of a texture to get the address to that level/face */
127 
128 unsigned
panfrost_texture_offset(const struct pan_image_layout * layout,unsigned level,unsigned array_idx,unsigned surface_idx)129 panfrost_texture_offset(const struct pan_image_layout *layout,
130                         unsigned level, unsigned array_idx,
131                         unsigned surface_idx)
132 {
133         return layout->slices[level].offset +
134                (array_idx * layout->array_stride) +
135                (surface_idx * layout->slices[level].surface_stride);
136 }
137 
138 bool
pan_image_layout_init(const struct panfrost_device * dev,struct pan_image_layout * layout,uint64_t modifier,enum pipe_format format,enum mali_texture_dimension dim,unsigned width,unsigned height,unsigned depth,unsigned array_size,unsigned nr_samples,unsigned nr_slices,enum pan_image_crc_mode crc_mode,const struct pan_image_explicit_layout * explicit_layout)139 pan_image_layout_init(const struct panfrost_device *dev,
140                       struct pan_image_layout *layout,
141                       uint64_t modifier,
142                       enum pipe_format format,
143                       enum mali_texture_dimension dim,
144                       unsigned width, unsigned height, unsigned depth,
145                       unsigned array_size, unsigned nr_samples,
146                       unsigned nr_slices, enum pan_image_crc_mode crc_mode,
147                       const struct pan_image_explicit_layout *explicit_layout)
148 {
149         /* Explicit stride only work with non-mipmap, non-array; single-sample
150          * 2D image, and in-band CRC can't be used.
151          */
152         if (explicit_layout &&
153 	    (depth > 1 || nr_samples > 1 || array_size > 1 ||
154              dim != MALI_TEXTURE_DIMENSION_2D || nr_slices > 1 ||
155              crc_mode == PAN_IMAGE_CRC_INBAND))
156                 return false;
157 
158         /* Mandate 64 byte alignement */
159         if (explicit_layout && (explicit_layout->offset & 63))
160                 return false;
161 
162         layout->crc_mode = crc_mode;
163         layout->modifier = modifier;
164         layout->format = format;
165         layout->dim = dim;
166         layout->width = width;
167         layout->height = height;
168         layout->depth = depth;
169         layout->array_size = array_size;
170         layout->nr_samples = nr_samples;
171         layout->nr_slices = nr_slices;
172 
173         unsigned bytes_per_pixel = util_format_get_blocksize(format);
174 
175         /* MSAA is implemented as a 3D texture with z corresponding to the
176          * sample #, horrifyingly enough */
177 
178         assert(depth == 1 || nr_samples == 1);
179 
180         bool afbc = drm_is_afbc(layout->modifier);
181         bool tiled = layout->modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED;
182         bool linear = layout->modifier == DRM_FORMAT_MOD_LINEAR;
183         bool should_align = tiled || afbc;
184         bool is_3d = layout->dim == MALI_TEXTURE_DIMENSION_3D;
185 
186         unsigned oob_crc_offset = 0;
187         unsigned offset = explicit_layout ? explicit_layout->offset : 0;
188         unsigned tile_h = 1, tile_w = 1, tile_shift = 0;
189 
190         if (tiled || afbc) {
191                 tile_w = panfrost_block_dim(layout->modifier, true, 0);
192                 tile_h = panfrost_block_dim(layout->modifier, false, 0);
193                 if (util_format_is_compressed(format))
194                         tile_shift = 2;
195         }
196 
197         for (unsigned l = 0; l < nr_slices; ++l) {
198                 struct pan_image_slice_layout *slice = &layout->slices[l];
199 
200                 unsigned effective_width = width;
201                 unsigned effective_height = height;
202                 unsigned effective_depth = depth;
203 
204                 if (should_align) {
205                         effective_width = ALIGN_POT(effective_width, tile_w) >> tile_shift;
206                         effective_height = ALIGN_POT(effective_height, tile_h);
207 
208                         /* We don't need to align depth */
209                 }
210 
211                 /* Align levels to cache-line as a performance improvement for
212                  * linear/tiled and as a requirement for AFBC */
213 
214                 offset = ALIGN_POT(offset, 64);
215 
216                 slice->offset = offset;
217 
218                 /* Compute the would-be stride */
219                 unsigned stride = bytes_per_pixel * effective_width;
220 
221                 if (explicit_layout) {
222                         /* Make sure the explicit stride is valid */
223                         if (explicit_layout->line_stride < stride)
224                                 return false;
225 
226                         stride = explicit_layout->line_stride;
227                 } else if (linear) {
228                         /* Keep lines alignment on 64 byte for performance */
229                         stride = ALIGN_POT(stride, 64);
230                 }
231 
232                 slice->line_stride = stride;
233                 slice->row_stride = stride * (tile_h >> tile_shift);
234 
235                 unsigned slice_one_size = slice->line_stride * effective_height;
236 
237                 /* Compute AFBC sizes if necessary */
238                 if (afbc) {
239                         slice->afbc.header_size =
240                                 panfrost_afbc_header_size(width, height);
241 
242                         /* Stride between two rows of AFBC headers */
243                         slice->afbc.row_stride =
244                                 (effective_width / tile_w) *
245                                 AFBC_HEADER_BYTES_PER_TILE;
246 
247                         /* AFBC body size */
248                         slice->afbc.body_size = slice_one_size;
249 
250                         /* 3D AFBC resources have all headers placed at the
251                          * beginning instead of having them split per depth
252                          * level
253                          */
254                         if (is_3d) {
255                                 slice->afbc.surface_stride =
256                                         slice->afbc.header_size;
257                                 slice->afbc.header_size *= effective_depth;
258                                 slice->afbc.body_size *= effective_depth;
259                                 offset += slice->afbc.header_size;
260                         } else {
261                                 slice_one_size += slice->afbc.header_size;
262                                 slice->afbc.surface_stride = slice_one_size;
263                         }
264                 }
265 
266                 unsigned slice_full_size =
267                         slice_one_size * effective_depth * nr_samples;
268 
269                 slice->surface_stride = slice_one_size;
270 
271                 /* Compute AFBC sizes if necessary */
272 
273                 offset += slice_full_size;
274                 slice->size = slice_full_size;
275 
276                 /* Add a checksum region if necessary */
277                 if (crc_mode != PAN_IMAGE_CRC_NONE) {
278                         slice->crc.size =
279                                 panfrost_compute_checksum_size(slice, width, height);
280 
281                         if (crc_mode == PAN_IMAGE_CRC_INBAND) {
282                                 slice->crc.offset = offset;
283                                 offset += slice->crc.size;
284                                 slice->size += slice->crc.size;
285                         } else {
286                                 slice->crc.offset = oob_crc_offset;
287                                 oob_crc_offset += slice->crc.size;
288                         }
289                 }
290 
291                 width = u_minify(width, 1);
292                 height = u_minify(height, 1);
293                 depth = u_minify(depth, 1);
294         }
295 
296         /* Arrays and cubemaps have the entire miptree duplicated */
297         layout->array_stride = ALIGN_POT(offset, 64);
298         if (explicit_layout)
299                 layout->data_size = offset;
300         else
301                 layout->data_size = ALIGN_POT(layout->array_stride * array_size, 4096);
302         layout->crc_size = oob_crc_offset;
303 
304         return true;
305 }
306 
307 void
pan_iview_get_surface(const struct pan_image_view * iview,unsigned level,unsigned layer,unsigned sample,struct pan_surface * surf)308 pan_iview_get_surface(const struct pan_image_view *iview,
309                       unsigned level, unsigned layer, unsigned sample,
310                       struct pan_surface *surf)
311 {
312         level += iview->first_level;
313         assert(level < iview->image->layout.nr_slices);
314 
315        layer += iview->first_layer;
316 
317         bool is_3d = iview->image->layout.dim == MALI_TEXTURE_DIMENSION_3D;
318         const struct pan_image_slice_layout *slice = &iview->image->layout.slices[level];
319         mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
320 
321         if (drm_is_afbc(iview->image->layout.modifier)) {
322                 assert(!sample);
323 
324                 if (is_3d) {
325                         ASSERTED unsigned depth = u_minify(iview->image->layout.depth, level);
326                         assert(layer < depth);
327                         surf->afbc.header = base + slice->offset +
328                                            (layer * slice->afbc.surface_stride);
329                         surf->afbc.body = base + slice->offset +
330                                           slice->afbc.header_size +
331                                           (slice->surface_stride * layer);
332                 } else {
333                         assert(layer < iview->image->layout.array_size);
334                         surf->afbc.header = base +
335                                             panfrost_texture_offset(&iview->image->layout,
336                                                                     level, layer, 0);
337                         surf->afbc.body = surf->afbc.header + slice->afbc.header_size;
338                 }
339         } else {
340                 unsigned array_idx = is_3d ? 0 : layer;
341                 unsigned surface_idx = is_3d ? layer : sample;
342 
343                 surf->data = base +
344                              panfrost_texture_offset(&iview->image->layout, level,
345                                                      array_idx, surface_idx);
346         }
347 }
348 
349 #else /* ifndef PAN_ARCH */
350 
351 #if PAN_ARCH >= 5
352 /* Arm Scalable Texture Compression (ASTC) corresponds to just a few formats.
353  * The block dimension is not part of the format. Instead, it is encoded as a
354  * 6-bit tag on the payload pointer. Map the block size for a single dimension.
355  */
356 
357 static inline enum mali_astc_2d_dimension
panfrost_astc_dim_2d(unsigned dim)358 panfrost_astc_dim_2d(unsigned dim)
359 {
360         switch (dim) {
361         case  4: return MALI_ASTC_2D_DIMENSION_4;
362         case  5: return MALI_ASTC_2D_DIMENSION_5;
363         case  6: return MALI_ASTC_2D_DIMENSION_6;
364         case  8: return MALI_ASTC_2D_DIMENSION_8;
365         case 10: return MALI_ASTC_2D_DIMENSION_10;
366         case 12: return MALI_ASTC_2D_DIMENSION_12;
367         default: unreachable("Invalid ASTC dimension");
368         }
369 }
370 
371 static inline enum mali_astc_3d_dimension
panfrost_astc_dim_3d(unsigned dim)372 panfrost_astc_dim_3d(unsigned dim)
373 {
374         switch (dim) {
375         case  3: return MALI_ASTC_3D_DIMENSION_3;
376         case  4: return MALI_ASTC_3D_DIMENSION_4;
377         case  5: return MALI_ASTC_3D_DIMENSION_5;
378         case  6: return MALI_ASTC_3D_DIMENSION_6;
379         default: unreachable("Invalid ASTC dimension");
380         }
381 }
382 
383 /* Texture addresses are tagged with information about compressed formats.
384  * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
385  * RGBA only).
386  * For ASTC, this is a "stretch factor" encoding the block size. */
387 
388 static unsigned
panfrost_compression_tag(const struct util_format_description * desc,enum mali_texture_dimension dim,uint64_t modifier)389 panfrost_compression_tag(const struct util_format_description *desc,
390                          enum mali_texture_dimension dim,
391                          uint64_t modifier)
392 {
393         if (drm_is_afbc(modifier)) {
394                 unsigned flags = (modifier & AFBC_FORMAT_MOD_YTR) ?
395                                  MALI_AFBC_SURFACE_FLAG_YTR : 0;
396 
397 #if PAN_ARCH >= 6
398                 /* Prefetch enable */
399                 flags |= MALI_AFBC_SURFACE_FLAG_PREFETCH;
400 
401                 /* Wide blocks (> 16x16) */
402                 if (panfrost_block_dim(modifier, true, 0) > 16)
403                         flags |= MALI_AFBC_SURFACE_FLAG_WIDE_BLOCK;
404 
405                 /* Used to make sure AFBC headers don't point outside the AFBC
406                  * body. HW is using the AFBC surface stride to do this check,
407                  * which doesn't work for 3D textures because the surface
408                  * stride does not cover the body. Only supported on v7+.
409                  */
410 #endif
411 
412 #if PAN_ARCH >= 7
413                 if (dim != MALI_TEXTURE_DIMENSION_3D)
414                         flags |= MALI_AFBC_SURFACE_FLAG_CHECK_PAYLOAD_RANGE;
415 #endif
416 
417                 return flags;
418         } else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
419                 if (desc->block.depth > 1) {
420                         return (panfrost_astc_dim_3d(desc->block.depth) << 4) |
421                                (panfrost_astc_dim_3d(desc->block.height) << 2) |
422                                 panfrost_astc_dim_3d(desc->block.width);
423                 } else {
424                         return (panfrost_astc_dim_2d(desc->block.height) << 3) |
425                                 panfrost_astc_dim_2d(desc->block.width);
426                 }
427         } else {
428                 return 0;
429         }
430 }
431 #endif
432 
433 /* Cubemaps have 6 faces as "layers" in between each actual layer. We
434  * need to fix this up. TODO: logic wrong in the asserted out cases ...
435  * can they happen, perhaps from cubemap arrays? */
436 
437 static void
panfrost_adjust_cube_dimensions(unsigned * first_face,unsigned * last_face,unsigned * first_layer,unsigned * last_layer)438 panfrost_adjust_cube_dimensions(
439                 unsigned *first_face, unsigned *last_face,
440                 unsigned *first_layer, unsigned *last_layer)
441 {
442         *first_face = *first_layer % 6;
443         *last_face = *last_layer % 6;
444         *first_layer /= 6;
445         *last_layer /= 6;
446 
447         assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
448 }
449 
450 /* Following the texture descriptor is a number of pointers. How many? */
451 
452 static unsigned
panfrost_texture_num_elements(unsigned first_level,unsigned last_level,unsigned first_layer,unsigned last_layer,unsigned nr_samples,bool is_cube,bool manual_stride)453 panfrost_texture_num_elements(
454                 unsigned first_level, unsigned last_level,
455                 unsigned first_layer, unsigned last_layer,
456                 unsigned nr_samples,
457                 bool is_cube, bool manual_stride)
458 {
459         unsigned first_face  = 0, last_face = 0;
460 
461         if (is_cube) {
462                 panfrost_adjust_cube_dimensions(&first_face, &last_face,
463                                 &first_layer, &last_layer);
464         }
465 
466         unsigned levels = 1 + last_level - first_level;
467         unsigned layers = 1 + last_layer - first_layer;
468         unsigned faces  = 1 + last_face  - first_face;
469         unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);
470 
471         if (manual_stride)
472                 num_elements *= 2;
473 
474         return num_elements;
475 }
476 
477 /* Conservative estimate of the size of the texture payload a priori.
478  * Average case, size equal to the actual size. Worst case, off by 2x (if
479  * a manual stride is not needed on a linear texture). Returned value
480  * must be greater than or equal to the actual size, so it's safe to use
481  * as an allocation amount */
482 
483 unsigned
GENX(panfrost_estimate_texture_payload_size)484 GENX(panfrost_estimate_texture_payload_size)(const struct pan_image_view *iview)
485 {
486         /* Assume worst case */
487         unsigned manual_stride = PAN_ARCH >= 6 ||
488                                  (iview->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
489 
490         unsigned elements =
491                 panfrost_texture_num_elements(iview->first_level, iview->last_level,
492                                               iview->first_layer, iview->last_layer,
493                                               iview->image->layout.nr_samples,
494                                               iview->dim == MALI_TEXTURE_DIMENSION_CUBE,
495                                               manual_stride);
496 
497         return sizeof(mali_ptr) * elements;
498 }
499 
500 struct panfrost_surface_iter {
501         unsigned layer, last_layer;
502         unsigned level, first_level, last_level;
503         unsigned face, first_face, last_face;
504         unsigned sample, first_sample, last_sample;
505 };
506 
507 static void
panfrost_surface_iter_begin(struct panfrost_surface_iter * iter,unsigned first_layer,unsigned last_layer,unsigned first_level,unsigned last_level,unsigned first_face,unsigned last_face,unsigned nr_samples)508 panfrost_surface_iter_begin(struct panfrost_surface_iter *iter,
509                             unsigned first_layer, unsigned last_layer,
510                             unsigned first_level, unsigned last_level,
511                             unsigned first_face, unsigned last_face,
512                             unsigned nr_samples)
513 {
514         iter->layer = first_layer;
515         iter->last_layer = last_layer;
516         iter->level = iter->first_level = first_level;
517         iter->last_level = last_level;
518         iter->face = iter->first_face = first_face;
519         iter->last_face = last_face;
520         iter->sample = iter->first_sample = 0;
521         iter->last_sample = nr_samples - 1;
522 }
523 
524 static bool
panfrost_surface_iter_end(const struct panfrost_surface_iter * iter)525 panfrost_surface_iter_end(const struct panfrost_surface_iter *iter)
526 {
527         return iter->layer > iter->last_layer;
528 }
529 
530 static void
panfrost_surface_iter_next(struct panfrost_surface_iter * iter)531 panfrost_surface_iter_next(struct panfrost_surface_iter *iter)
532 {
533 #define INC_TEST(field) \
534         do { \
535                 if (iter->field++ < iter->last_ ## field) \
536                        return; \
537                 iter->field = iter->first_ ## field; \
538         } while (0)
539 
540         /* Ordering is different on v7: inner loop is iterating on levels */
541         if (PAN_ARCH >= 7)
542                 INC_TEST(level);
543 
544         INC_TEST(sample);
545         INC_TEST(face);
546 
547         if (PAN_ARCH < 7)
548                 INC_TEST(level);
549 
550         iter->layer++;
551 
552 #undef INC_TEST
553 }
554 
555 static void
panfrost_get_surface_strides(const struct pan_image_layout * layout,unsigned l,int32_t * row_stride,int32_t * surf_stride)556 panfrost_get_surface_strides(const struct pan_image_layout *layout,
557                              unsigned l,
558                              int32_t *row_stride, int32_t *surf_stride)
559 {
560         const struct pan_image_slice_layout *slice = &layout->slices[l];
561 
562         if (drm_is_afbc(layout->modifier)) {
563                 /* Pre v7 don't have a row stride field. This field is
564                  * repurposed as a Y offset which we don't use */
565                 *row_stride = PAN_ARCH < 7 ? 0 : slice->afbc.row_stride;
566                 *surf_stride = slice->afbc.surface_stride;
567         } else {
568                 *row_stride = slice->row_stride;
569                 *surf_stride = slice->surface_stride;
570         }
571 }
572 
573 static mali_ptr
panfrost_get_surface_pointer(const struct pan_image_layout * layout,enum mali_texture_dimension dim,mali_ptr base,unsigned l,unsigned w,unsigned f,unsigned s)574 panfrost_get_surface_pointer(const struct pan_image_layout *layout,
575                              enum mali_texture_dimension dim,
576                              mali_ptr base,
577                              unsigned l, unsigned w, unsigned f, unsigned s)
578 {
579         unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
580         unsigned offset;
581 
582         if (layout->dim == MALI_TEXTURE_DIMENSION_3D) {
583                 assert(!f && !s);
584                 offset = layout->slices[l].offset +
585                          (w * panfrost_get_layer_stride(layout, l));
586         } else {
587                 offset = panfrost_texture_offset(layout, l, (w * face_mult) + f, s);
588         }
589 
590         return base + offset;
591 }
592 
593 static void
panfrost_emit_texture_payload(const struct pan_image_view * iview,enum pipe_format format,bool manual_stride,void * payload)594 panfrost_emit_texture_payload(const struct pan_image_view *iview,
595                               enum pipe_format format,
596                               bool manual_stride,
597                               void *payload)
598 {
599         const struct pan_image_layout *layout = &iview->image->layout;
600         ASSERTED const struct util_format_description *desc =
601                 util_format_description(format);
602 
603         mali_ptr base = iview->image->data.bo->ptr.gpu + iview->image->data.offset;
604 
605         if (iview->buf.size) {
606                 assert (iview->dim == MALI_TEXTURE_DIMENSION_1D);
607                 base += iview->buf.offset;
608         }
609 
610 #if PAN_ARCH >= 5
611         /* panfrost_compression_tag() wants the dimension of the resource, not the
612          * one of the image view (those might differ).
613          */
614         base |= panfrost_compression_tag(desc, layout->dim, layout->modifier);
615 #else
616         assert(!drm_is_afbc(layout->modifier) && "no AFBC on v4");
617         assert(desc->layout != UTIL_FORMAT_LAYOUT_ASTC && "no ASTC on v4");
618 #endif
619 
620         /* Inject the addresses in, interleaving array indices, mip levels,
621          * cube faces, and strides in that order */
622 
623         unsigned first_layer = iview->first_layer, last_layer = iview->last_layer;
624         unsigned nr_samples = layout->nr_samples;
625         unsigned first_face = 0, last_face = 0;
626 
627         if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
628                 panfrost_adjust_cube_dimensions(&first_face, &last_face,
629                                                 &first_layer, &last_layer);
630         }
631 
632         struct panfrost_surface_iter iter;
633 
634         for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
635                                          iview->first_level, iview->last_level,
636                                          first_face, last_face, nr_samples);
637              !panfrost_surface_iter_end(&iter);
638              panfrost_surface_iter_next(&iter)) {
639                 mali_ptr pointer =
640                         panfrost_get_surface_pointer(layout, iview->dim, base,
641                                                      iter.level, iter.layer,
642                                                      iter.face, iter.sample);
643 
644                 if (!manual_stride) {
645                         pan_pack(payload, SURFACE, cfg) {
646                                 cfg.pointer = pointer;
647                         }
648                         payload += pan_size(SURFACE);
649                 } else {
650                         pan_pack(payload, SURFACE_WITH_STRIDE, cfg) {
651                                 cfg.pointer = pointer;
652                                 panfrost_get_surface_strides(layout, iter.level,
653                                                              &cfg.row_stride,
654                                                              &cfg.surface_stride);
655                         }
656                         payload += pan_size(SURFACE_WITH_STRIDE);
657                 }
658         }
659 }
660 
661 /* Check if we need to set a custom stride by computing the "expected"
662  * stride and comparing it to what the user actually wants. Only applies
663  * to linear textures, since tiled/compressed textures have strict
664  * alignment requirements for their strides as it is */
665 
666 static bool
panfrost_needs_explicit_stride(const struct pan_image_view * iview)667 panfrost_needs_explicit_stride(const struct pan_image_view *iview)
668 {
669         /* Stride is explicit on Bifrost */
670         if (PAN_ARCH >= 6)
671                 return true;
672 
673         if (iview->image->layout.modifier != DRM_FORMAT_MOD_LINEAR)
674                 return false;
675 
676         unsigned bytes_per_block = util_format_get_blocksize(iview->format);
677         unsigned block_w = util_format_get_blockwidth(iview->format);
678 
679         for (unsigned l = iview->first_level; l <= iview->last_level; ++l) {
680                 unsigned actual = iview->image->layout.slices[l].line_stride;
681                 unsigned expected =
682                         DIV_ROUND_UP(u_minify(iview->image->layout.width, l), block_w) *
683                         bytes_per_block;
684 
685                 if (actual != expected)
686                         return true;
687         }
688 
689         return false;
690 }
691 
692 /* Map modifiers to mali_texture_layout for packing in a texture descriptor */
693 
694 static enum mali_texture_layout
panfrost_modifier_to_layout(uint64_t modifier)695 panfrost_modifier_to_layout(uint64_t modifier)
696 {
697         if (drm_is_afbc(modifier))
698                 return MALI_TEXTURE_LAYOUT_AFBC;
699         else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
700                 return MALI_TEXTURE_LAYOUT_TILED;
701         else if (modifier == DRM_FORMAT_MOD_LINEAR)
702                 return MALI_TEXTURE_LAYOUT_LINEAR;
703         else
704                 unreachable("Invalid modifer");
705 }
706 
707 void
GENX(panfrost_new_texture)708 GENX(panfrost_new_texture)(const struct panfrost_device *dev,
709                            const struct pan_image_view *iview,
710                            void *out, const struct panfrost_ptr *payload)
711 {
712         const struct pan_image_layout *layout = &iview->image->layout;
713         enum pipe_format format = iview->format;
714         unsigned swizzle;
715 
716         if (PAN_ARCH == 7 && util_format_is_depth_or_stencil(format)) {
717                 /* v7 doesn't have an _RRRR component order, combine the
718                  * user swizzle with a .XXXX swizzle to emulate that.
719                  */
720                 static const unsigned char replicate_x[4] = {
721                         PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
722                         PIPE_SWIZZLE_X, PIPE_SWIZZLE_X,
723                 };
724                 unsigned char patched_swizzle[4];
725 
726                 util_format_compose_swizzles(replicate_x,
727                                              iview->swizzle,
728                                              patched_swizzle);
729                 swizzle = panfrost_translate_swizzle_4(patched_swizzle);
730         } else {
731                 swizzle = panfrost_translate_swizzle_4(iview->swizzle);
732         }
733 
734         bool manual_stride =
735                 panfrost_needs_explicit_stride(iview);
736 
737         panfrost_emit_texture_payload(iview, format,
738                                       manual_stride,
739                                       payload->cpu);
740 
741         unsigned array_size = iview->last_layer - iview->first_layer + 1;
742 
743         if (iview->dim == MALI_TEXTURE_DIMENSION_CUBE) {
744                 assert(iview->first_layer % 6 == 0);
745                 assert(iview->last_layer % 6 == 5);
746                 array_size /=  6;
747         }
748 
749         unsigned width;
750 
751         if (iview->buf.size) {
752                 assert(iview->dim == MALI_TEXTURE_DIMENSION_1D);
753                 assert(!iview->first_level && !iview->last_level);
754                 assert(!iview->first_layer && !iview->last_layer);
755                 assert(layout->nr_samples == 1);
756                 assert(layout->height == 1 && layout->depth == 1);
757                 assert(iview->buf.offset + iview->buf.size <= layout->width);
758                 width = iview->buf.size;
759         } else {
760                 width = u_minify(layout->width, iview->first_level);
761         }
762 
763         pan_pack(out, TEXTURE, cfg) {
764                 cfg.dimension = iview->dim;
765                 cfg.format = dev->formats[format].hw;
766                 cfg.width = width;
767                 cfg.height = u_minify(layout->height, iview->first_level);
768                 if (iview->dim == MALI_TEXTURE_DIMENSION_3D)
769                         cfg.depth = u_minify(layout->depth, iview->first_level);
770                 else
771                         cfg.sample_count = layout->nr_samples;
772                 cfg.swizzle = swizzle;
773                 cfg.texel_ordering =
774                         panfrost_modifier_to_layout(layout->modifier);
775                 cfg.levels = iview->last_level - iview->first_level + 1;
776                 cfg.array_size = array_size;
777 
778 #if PAN_ARCH >= 6
779                 cfg.surfaces = payload->gpu;
780 
781                 /* We specify API-level LOD clamps in the sampler descriptor
782                  * and use these clamps simply for bounds checking */
783                 cfg.minimum_lod = FIXED_16(0, false);
784                 cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
785 #else
786                 cfg.manual_stride = manual_stride;
787 #endif
788         }
789 }
790 #endif /* ifdef PAN_ARCH */
791