1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include "util/macros.h"
29 
30 #include "panfrost-quirks.h"
31 
32 #include "pan_cs.h"
33 #include "pan_encoder.h"
34 #include "pan_texture.h"
35 
36 static unsigned
mod_to_block_fmt(uint64_t mod)37 mod_to_block_fmt(uint64_t mod)
38 {
39         switch (mod) {
40         case DRM_FORMAT_MOD_LINEAR:
41                 return MALI_BLOCK_FORMAT_LINEAR;
42 	case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
43                 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
44         default:
45 #if PAN_ARCH >= 5
46                 if (drm_is_afbc(mod))
47                         return MALI_BLOCK_FORMAT_AFBC;
48 #endif
49 
50                 unreachable("Unsupported modifer");
51         }
52 }
53 
54 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)55 mali_sampling_mode(const struct pan_image_view *view)
56 {
57         if (view->image->layout.nr_samples > 1) {
58                 assert(view->nr_samples == view->image->layout.nr_samples);
59                 assert(view->image->layout.slices[0].surface_stride != 0);
60                 return MALI_MSAA_LAYERED;
61         }
62 
63         if (view->nr_samples > view->image->layout.nr_samples) {
64                 assert(view->image->layout.nr_samples == 1);
65                 return MALI_MSAA_AVERAGE;
66         }
67 
68         assert(view->nr_samples == view->image->layout.nr_samples);
69         assert(view->nr_samples == 1);
70 
71         return MALI_MSAA_SINGLE;
72 }
73 
74 static inline enum mali_sample_pattern
pan_sample_pattern(unsigned samples)75 pan_sample_pattern(unsigned samples)
76 {
77         switch (samples) {
78         case 1:  return MALI_SAMPLE_PATTERN_SINGLE_SAMPLED;
79         case 4:  return MALI_SAMPLE_PATTERN_ROTATED_4X_GRID;
80         case 8:  return MALI_SAMPLE_PATTERN_D3D_8X_GRID;
81         case 16: return MALI_SAMPLE_PATTERN_D3D_16X_GRID;
82         default: unreachable("Unsupported sample count");
83         }
84 }
85 
86 int
GENX(pan_select_crc_rt)87 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb)
88 {
89 #if PAN_ARCH <= 6
90         if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
91             fb->rts[0].view->image->layout.crc_mode != PAN_IMAGE_CRC_NONE)
92                 return 0;
93 
94         return -1;
95 #else
96         bool best_rt_valid = false;
97         int best_rt = -1;
98 
99         for (unsigned i = 0; i < fb->rt_count; i++) {
100 		if (!fb->rts[i].view || fb->rts[0].discard ||
101                     fb->rts[i].view->image->layout.crc_mode == PAN_IMAGE_CRC_NONE)
102                         continue;
103 
104                 bool valid = *(fb->rts[i].crc_valid);
105                 bool full = !fb->extent.minx && !fb->extent.miny &&
106                             fb->extent.maxx == (fb->width - 1) &&
107                             fb->extent.maxy == (fb->height - 1);
108                 if (!full && !valid)
109                         continue;
110 
111                 if (best_rt < 0 || (valid && !best_rt_valid)) {
112                         best_rt = i;
113                         best_rt_valid = valid;
114                 }
115 
116                 if (valid)
117                         break;
118         }
119 
120         return best_rt;
121 #endif
122 }
123 
124 static enum mali_zs_format
translate_zs_format(enum pipe_format in)125 translate_zs_format(enum pipe_format in)
126 {
127         switch (in) {
128         case PIPE_FORMAT_Z16_UNORM: return MALI_ZS_FORMAT_D16;
129         case PIPE_FORMAT_Z24_UNORM_S8_UINT: return MALI_ZS_FORMAT_D24S8;
130         case PIPE_FORMAT_Z24X8_UNORM: return MALI_ZS_FORMAT_D24X8;
131         case PIPE_FORMAT_Z32_FLOAT: return MALI_ZS_FORMAT_D32;
132         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return MALI_ZS_FORMAT_D32_S8X24;
133         default: unreachable("Unsupported depth/stencil format.");
134         }
135 }
136 
137 #if PAN_ARCH >= 5
138 static enum mali_s_format
translate_s_format(enum pipe_format in)139 translate_s_format(enum pipe_format in)
140 {
141         switch (in) {
142         case PIPE_FORMAT_S8_UINT: return MALI_S_FORMAT_S8;
143         case PIPE_FORMAT_S8_UINT_Z24_UNORM:
144         case PIPE_FORMAT_S8X24_UINT:
145                 return MALI_S_FORMAT_S8X24;
146         case PIPE_FORMAT_Z24_UNORM_S8_UINT:
147         case PIPE_FORMAT_X24S8_UINT:
148                 return MALI_S_FORMAT_X24S8;
149         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
150                 return MALI_S_FORMAT_X32_S8X24;
151         default:
152                 unreachable("Unsupported stencil format.");
153         }
154 }
155 
156 static void
pan_prepare_s(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)157 pan_prepare_s(const struct pan_fb_info *fb,
158               struct MALI_ZS_CRC_EXTENSION *ext)
159 {
160         const struct pan_image_view *s = fb->zs.view.s;
161 
162         if (!s)
163                 return;
164 
165         unsigned level = s->first_level;
166 
167         ext->s_msaa = mali_sampling_mode(s);
168 
169         struct pan_surface surf;
170         pan_iview_get_surface(s, 0, 0, 0, &surf);
171 
172         assert(s->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
173                s->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
174         ext->s_writeback_base = surf.data;
175         ext->s_writeback_row_stride = s->image->layout.slices[level].row_stride;
176         ext->s_writeback_surface_stride =
177                 (s->image->layout.nr_samples > 1) ?
178                 s->image->layout.slices[level].surface_stride : 0;
179         ext->s_block_format = mod_to_block_fmt(s->image->layout.modifier);
180         ext->s_write_format = translate_s_format(s->format);
181 }
182 
183 static void
pan_prepare_zs(const struct pan_fb_info * fb,struct MALI_ZS_CRC_EXTENSION * ext)184 pan_prepare_zs(const struct pan_fb_info *fb,
185                struct MALI_ZS_CRC_EXTENSION *ext)
186 {
187         const struct pan_image_view *zs = fb->zs.view.zs;
188 
189         if (!zs)
190                 return;
191 
192         unsigned level = zs->first_level;
193 
194         ext->zs_msaa = mali_sampling_mode(zs);
195 
196         struct pan_surface surf;
197         pan_iview_get_surface(zs, 0, 0, 0, &surf);
198 
199         if (drm_is_afbc(zs->image->layout.modifier)) {
200 #if PAN_ARCH >= 6
201                 const struct pan_image_slice_layout *slice = &zs->image->layout.slices[level];
202 
203                 ext->zs_afbc_row_stride = slice->afbc.row_stride /
204                                           AFBC_HEADER_BYTES_PER_TILE;
205 #else
206                 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
207                 ext->zs_afbc_body_size = 0x1000;
208                 ext->zs_afbc_chunk_size = 9;
209                 ext->zs_afbc_sparse = true;
210 #endif
211 
212                 ext->zs_afbc_header = surf.afbc.header;
213                 ext->zs_afbc_body = surf.afbc.body;
214         } else {
215                 assert(zs->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
216                        zs->image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
217 
218                 /* TODO: Z32F(S8) support, which is always linear */
219 
220                 ext->zs_writeback_base = surf.data;
221                 ext->zs_writeback_row_stride =
222                         zs->image->layout.slices[level].row_stride;
223                 ext->zs_writeback_surface_stride =
224                         (zs->image->layout.nr_samples > 1) ?
225                         zs->image->layout.slices[level].surface_stride : 0;
226         }
227 
228         ext->zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
229         ext->zs_write_format = translate_zs_format(zs->format);
230         if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
231                 ext->s_writeback_base = ext->zs_writeback_base;
232 }
233 
234 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)235 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
236                 struct MALI_ZS_CRC_EXTENSION *ext)
237 {
238         if (rt_crc < 0)
239                 return;
240 
241         assert(rt_crc < fb->rt_count);
242 
243         const struct pan_image_view *rt = fb->rts[rt_crc].view;
244         const struct pan_image_slice_layout *slice = &rt->image->layout.slices[rt->first_level];
245         ext->crc_base = (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND ?
246                          (rt->image->data.bo->ptr.gpu + rt->image->data.offset) :
247                          (rt->image->crc.bo->ptr.gpu + rt->image->crc.offset)) +
248                         slice->crc.offset;
249         ext->crc_row_stride = slice->crc.stride;
250 
251 #if PAN_ARCH >= 7
252         ext->crc_render_target = rt_crc;
253 
254         if (fb->rts[rt_crc].clear) {
255                 uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
256                 ext->crc_clear_color = clear_val | 0xc000000000000000 |
257                                        (((uint64_t)clear_val & 0xffff) << 32);
258         }
259 #endif
260 }
261 
262 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,int rt_crc,void * zs_crc_ext)263 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, int rt_crc,
264                     void *zs_crc_ext)
265 {
266         pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
267                 pan_prepare_crc(fb, rt_crc, &cfg);
268                 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
269                 pan_prepare_zs(fb, &cfg);
270                 pan_prepare_s(fb, &cfg);
271         }
272 }
273 
274 /* Measure format as it appears in the tile buffer */
275 
276 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)277 pan_bytes_per_pixel_tib(enum pipe_format format)
278 {
279         if (panfrost_blendable_formats_v7[format].internal) {
280                 /* Blendable formats are always 32-bits in the tile buffer,
281                  * extra bits are used as padding or to dither */
282                 return 4;
283         } else {
284                 /* Non-blendable formats are raw, rounded up to the nearest
285                  * power-of-two size */
286                 unsigned bytes = util_format_get_blocksize(format);
287                 return util_next_power_of_two(bytes);
288         }
289 }
290 
291 static unsigned
pan_internal_cbuf_size(const struct pan_fb_info * fb,unsigned * tile_size)292 pan_internal_cbuf_size(const struct pan_fb_info *fb,
293                        unsigned *tile_size)
294 {
295         unsigned total_size = 0;
296 
297         *tile_size = 16 * 16;
298         for (int cb = 0; cb < fb->rt_count; ++cb) {
299                 const struct pan_image_view *rt = fb->rts[cb].view;
300 
301                 if (!rt)
302                         continue;
303 
304                 total_size += pan_bytes_per_pixel_tib(rt->format) *
305                               rt->nr_samples * (*tile_size);
306         }
307 
308         /* We have a 4KB budget, let's reduce the tile size until it fits. */
309         while (total_size > 4096) {
310                 total_size >>= 1;
311                 *tile_size >>= 1;
312         }
313 
314         /* Align on 1k. */
315         total_size = ALIGN_POT(total_size, 1024);
316 
317         /* Minimum tile size is 4x4. */
318         assert(*tile_size >= 4 * 4);
319         return total_size;
320 }
321 
322 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)323 pan_mfbd_raw_format(unsigned bits)
324 {
325         switch (bits) {
326         case    8: return MALI_COLOR_FORMAT_RAW8;
327         case   16: return MALI_COLOR_FORMAT_RAW16;
328         case   24: return MALI_COLOR_FORMAT_RAW24;
329         case   32: return MALI_COLOR_FORMAT_RAW32;
330         case   48: return MALI_COLOR_FORMAT_RAW48;
331         case   64: return MALI_COLOR_FORMAT_RAW64;
332         case   96: return MALI_COLOR_FORMAT_RAW96;
333         case  128: return MALI_COLOR_FORMAT_RAW128;
334         case  192: return MALI_COLOR_FORMAT_RAW192;
335         case  256: return MALI_COLOR_FORMAT_RAW256;
336         case  384: return MALI_COLOR_FORMAT_RAW384;
337         case  512: return MALI_COLOR_FORMAT_RAW512;
338         case  768: return MALI_COLOR_FORMAT_RAW768;
339         case 1024: return MALI_COLOR_FORMAT_RAW1024;
340         case 1536: return MALI_COLOR_FORMAT_RAW1536;
341         case 2048: return MALI_COLOR_FORMAT_RAW2048;
342         default: unreachable("invalid raw bpp");
343         }
344 }
345 
346 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)347 pan_rt_init_format(const struct pan_image_view *rt,
348                    struct MALI_RENDER_TARGET *cfg)
349 {
350         /* Explode details on the format */
351 
352         const struct util_format_description *desc =
353                 util_format_description(rt->format);
354 
355         /* The swizzle for rendering is inverted from texturing */
356 
357         unsigned char swizzle[4] = {
358                 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
359         };
360 
361         /* Fill in accordingly, defaulting to 8-bit UNORM */
362 
363         if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
364                 cfg->srgb = true;
365 
366         struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
367 
368         if (fmt.internal) {
369                 cfg->internal_format = fmt.internal;
370                 cfg->writeback_format = fmt.writeback;
371                 panfrost_invert_swizzle(desc->swizzle, swizzle);
372         } else {
373                 /* Construct RAW internal/writeback, where internal is
374                  * specified logarithmically (round to next power-of-two).
375                  * Offset specified from RAW8, where 8 = 2^3 */
376 
377                 unsigned bits = desc->block.bits;
378                 unsigned offset = util_logbase2_ceil(bits) - 3;
379                 assert(offset <= 4);
380 
381                 cfg->internal_format =
382                         MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
383 
384                 cfg->writeback_format = pan_mfbd_raw_format(bits);
385         }
386 
387         cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
388 }
389 
390 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)391 pan_prepare_rt(const struct pan_fb_info *fb, unsigned idx,
392                unsigned cbuf_offset,
393                struct MALI_RENDER_TARGET *cfg)
394 {
395         cfg->clean_pixel_write_enable = fb->rts[idx].clear;
396         cfg->internal_buffer_offset = cbuf_offset;
397         if (fb->rts[idx].clear) {
398                 cfg->clear.color_0 = fb->rts[idx].clear_value[0];
399                 cfg->clear.color_1 = fb->rts[idx].clear_value[1];
400                 cfg->clear.color_2 = fb->rts[idx].clear_value[2];
401                 cfg->clear.color_3 = fb->rts[idx].clear_value[3];
402         }
403 
404         const struct pan_image_view *rt = fb->rts[idx].view;
405         if (!rt || fb->rts[idx].discard) {
406                 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
407                 cfg->internal_buffer_offset = cbuf_offset;
408 #if PAN_ARCH >= 7
409                 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
410                 cfg->dithering_enable = true;
411 #endif
412                 return;
413         }
414 
415         cfg->write_enable = true;
416         cfg->dithering_enable = true;
417 
418         unsigned level = rt->first_level;
419         assert(rt->last_level == rt->first_level);
420         assert(rt->last_layer == rt->first_layer);
421 
422         int row_stride = rt->image->layout.slices[level].row_stride;
423 
424         /* Only set layer_stride for layered MSAA rendering  */
425 
426         unsigned layer_stride =
427                 (rt->image->layout.nr_samples > 1) ?
428                         rt->image->layout.slices[level].surface_stride : 0;
429 
430         cfg->writeback_msaa = mali_sampling_mode(rt);
431 
432         pan_rt_init_format(rt, cfg);
433 
434 #if PAN_ARCH <= 5
435         cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
436 #else
437         cfg->writeback_block_format = mod_to_block_fmt(rt->image->layout.modifier);
438 #endif
439 
440         struct pan_surface surf;
441         pan_iview_get_surface(rt, 0, 0, 0, &surf);
442 
443         if (drm_is_afbc(rt->image->layout.modifier)) {
444                 const struct pan_image_slice_layout *slice = &rt->image->layout.slices[level];
445 
446 #if PAN_ARCH >= 6
447                 cfg->afbc.row_stride = slice->afbc.row_stride /
448                                        AFBC_HEADER_BYTES_PER_TILE;
449                 cfg->afbc.afbc_wide_block_enable =
450                         panfrost_block_dim(rt->image->layout.modifier, true, 0) > 16;
451 #else
452                 cfg->afbc.chunk_size = 9;
453                 cfg->afbc.sparse = true;
454                 cfg->afbc.body_size = slice->afbc.body_size;
455 #endif
456 
457                 cfg->afbc.header = surf.afbc.header;
458                 cfg->afbc.body = surf.afbc.body;
459 
460                 if (rt->image->layout.modifier & AFBC_FORMAT_MOD_YTR)
461                         cfg->afbc.yuv_transform_enable = true;
462         } else {
463                 assert(rt->image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
464                        rt->image->layout.modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
465                 cfg->rgb.base = surf.data;
466                 cfg->rgb.row_stride = row_stride;
467                 cfg->rgb.surface_stride = layer_stride;
468         }
469 }
470 #endif
471 
472 void
GENX(pan_emit_tls)473 GENX(pan_emit_tls)(const struct pan_tls_info *info,
474                    void *out)
475 {
476         pan_pack(out, LOCAL_STORAGE, cfg) {
477                 if (info->tls.size) {
478                         unsigned shift =
479                                 panfrost_get_stack_shift(info->tls.size);
480 
481                         cfg.tls_size = shift;
482                         cfg.tls_base_pointer = info->tls.ptr;
483                 }
484 
485                 if (info->wls.size) {
486                         assert(!(info->wls.ptr & 4095));
487                         assert((info->wls.ptr & 0xffffffff00000000ULL) == ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
488                         cfg.wls_base_pointer = info->wls.ptr;
489                         unsigned wls_size = pan_wls_adjust_size(info->wls.size);
490                         cfg.wls_instances = pan_wls_instances(&info->wls.dim);
491                         cfg.wls_size_scale = util_logbase2(wls_size) + 1;
492                 } else {
493                         cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
494                 }
495         }
496 }
497 
498 #if PAN_ARCH <= 5
499 static void
pan_emit_midgard_tiler(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,void * out)500 pan_emit_midgard_tiler(const struct panfrost_device *dev,
501                        const struct pan_fb_info *fb,
502                        const struct pan_tiler_context *tiler_ctx,
503                        void *out)
504 {
505         bool hierarchy = !(dev->quirks & MIDGARD_NO_HIER_TILING);
506 
507         assert(tiler_ctx->midgard.polygon_list->ptr.gpu);
508 
509         pan_pack(out, TILER_CONTEXT, cfg) {
510                 unsigned header_size;
511 
512                 if (tiler_ctx->midgard.disable) {
513                         cfg.hierarchy_mask =
514                                 hierarchy ?
515                                 MALI_MIDGARD_TILER_DISABLED :
516                                 MALI_MIDGARD_TILER_USER;
517                         header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
518                         cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
519                         cfg.heap_start = tiler_ctx->midgard.polygon_list->ptr.gpu;
520                         cfg.heap_end = tiler_ctx->midgard.polygon_list->ptr.gpu;
521 		} else {
522                         cfg.hierarchy_mask =
523                                 panfrost_choose_hierarchy_mask(fb->width,
524                                                                fb->height,
525                                                                1, hierarchy);
526                         header_size = panfrost_tiler_header_size(fb->width,
527                                                                  fb->height,
528                                                                  cfg.hierarchy_mask,
529                                                                  hierarchy);
530                         cfg.polygon_list_size =
531                                 panfrost_tiler_full_size(fb->width, fb->height,
532                                                          cfg.hierarchy_mask,
533                                                          hierarchy);
534                         cfg.heap_start = dev->tiler_heap->ptr.gpu;
535                         cfg.heap_end = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
536                 }
537 
538                 cfg.polygon_list = tiler_ctx->midgard.polygon_list->ptr.gpu;
539                 cfg.polygon_list_body = cfg.polygon_list + header_size;
540         }
541 }
542 #endif
543 
544 #if PAN_ARCH >= 5
545 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned idx,unsigned cbuf_offset,void * out)546 pan_emit_rt(const struct pan_fb_info *fb,
547             unsigned idx, unsigned cbuf_offset, void *out)
548 {
549         pan_pack(out, RENDER_TARGET, cfg) {
550                 pan_prepare_rt(fb, idx, cbuf_offset, &cfg);
551         }
552 }
553 
554 #if PAN_ARCH >= 6
555 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
556  *
557  *      Forcing clean_tile_writes breaks INTERSECT readbacks
558  *
559  * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
560  * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
561  * ignore, this cannot affect correctness, only performance */
562 
563 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)564 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode, bool force_clean_tile)
565 {
566         if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
567                 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
568         else
569                 return mode;
570 }
571 
572 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
573  * the effective tile size differs from the superblock size of any enabled AFBC
574  * render target. Check this condition. */
575 
576 static bool
pan_force_clean_write_rt(const struct pan_image_view * rt,unsigned tile_size)577 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
578 {
579         if (!drm_is_afbc(rt->image->layout.modifier))
580                 return false;
581 
582         unsigned superblock = panfrost_block_dim(rt->image->layout.modifier, true, 0);
583 
584         assert(superblock >= 16);
585         assert(tile_size <= 16*16);
586 
587         /* Tile size and superblock differ unless they are both 16x16 */
588         return !(superblock == 16 && tile_size == 16*16);
589 }
590 
591 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)592 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
593 {
594         /* Maximum tile size */
595         assert(tile_size <= 16*16);
596 
597         for (unsigned i = 0; i < fb->rt_count; ++i) {
598                 if (fb->rts[i].view && !fb->rts[i].discard &&
599                     pan_force_clean_write_rt(fb->rts[i].view, tile_size))
600                         return true;
601         }
602 
603         if (fb->zs.view.zs && !fb->zs.discard.z &&
604             pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
605                 return true;
606 
607         if (fb->zs.view.s && !fb->zs.discard.s &&
608             pan_force_clean_write_rt(fb->zs.view.s, tile_size))
609                 return true;
610 
611         return false;
612 }
613 
614 #endif
615 
616 static unsigned
pan_emit_mfbd(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tls_info * tls,const struct pan_tiler_context * tiler_ctx,void * out)617 pan_emit_mfbd(const struct panfrost_device *dev,
618               const struct pan_fb_info *fb,
619               const struct pan_tls_info *tls,
620               const struct pan_tiler_context *tiler_ctx,
621               void *out)
622 {
623         unsigned tags = MALI_FBD_TAG_IS_MFBD;
624         void *fbd = out;
625         void *rtd = out + pan_size(FRAMEBUFFER);
626 
627 #if PAN_ARCH <= 5
628         GENX(pan_emit_tls)(tls,
629                            pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
630 #endif
631 
632         unsigned tile_size;
633         unsigned internal_cbuf_size = pan_internal_cbuf_size(fb, &tile_size);
634         int crc_rt = GENX(pan_select_crc_rt)(fb);
635         bool has_zs_crc_ext = pan_fbd_has_zs_crc_ext(fb);
636 
637         pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
638 #if PAN_ARCH >= 6
639                 bool force_clean_write = pan_force_clean_write(fb, tile_size);
640 
641                 cfg.sample_locations =
642                         panfrost_sample_positions(dev, pan_sample_pattern(fb->nr_samples));
643                 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0], force_clean_write);
644                 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1], force_clean_write);
645                 cfg.post_frame  = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2], force_clean_write);
646                 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
647                 cfg.tiler = tiler_ctx->bifrost;
648 #endif
649                 cfg.width = fb->width;
650                 cfg.height = fb->height;
651                 cfg.bound_max_x = fb->width - 1;
652                 cfg.bound_max_y = fb->height - 1;
653 
654                 cfg.effective_tile_size = tile_size;
655                 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
656                 cfg.render_target_count = MAX2(fb->rt_count, 1);
657 
658                 /* Default to 24 bit depth if there's no surface. */
659                 cfg.z_internal_format =
660                         fb->zs.view.zs ?
661                         panfrost_get_z_internal_format(fb->zs.view.zs->format) :
662                         MALI_Z_INTERNAL_FORMAT_D24;
663 
664                 cfg.z_clear = fb->zs.clear_value.depth;
665                 cfg.s_clear = fb->zs.clear_value.stencil;
666                 cfg.color_buffer_allocation = internal_cbuf_size;
667                 cfg.sample_count = fb->nr_samples;
668                 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
669                 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
670                 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
671                 cfg.has_zs_crc_extension = has_zs_crc_ext;
672 
673                 if (crc_rt >= 0) {
674                         bool *valid = fb->rts[crc_rt].crc_valid;
675                         bool full = !fb->extent.minx && !fb->extent.miny &&
676                                     fb->extent.maxx == (fb->width - 1) &&
677                                     fb->extent.maxy == (fb->height - 1);
678 
679                         cfg.crc_read_enable = *valid;
680 
681                         /* If the data is currently invalid, still write CRC
682                          * data if we are doing a full write, so that it is
683                          * valid for next time. */
684                         cfg.crc_write_enable = *valid || full;
685 
686                         *valid |= full;
687                 }
688         }
689 
690 #if PAN_ARCH >= 6
691         pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding);
692 #else
693         pan_emit_midgard_tiler(dev, fb, tiler_ctx,
694                                pan_section_ptr(fbd, FRAMEBUFFER, TILER));
695 
696         /* All weights set to 0, nothing to do here */
697         pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
698 #endif
699 
700         if (has_zs_crc_ext) {
701                 pan_emit_zs_crc_ext(fb, crc_rt,
702                                     out + pan_size(FRAMEBUFFER));
703                 rtd += pan_size(ZS_CRC_EXTENSION);
704                 tags |= MALI_FBD_TAG_HAS_ZS_RT;
705         }
706 
707         unsigned rt_count = MAX2(fb->rt_count, 1);
708         unsigned cbuf_offset = 0;
709         for (unsigned i = 0; i < rt_count; i++) {
710                 pan_emit_rt(fb, i, cbuf_offset, rtd);
711                 rtd += pan_size(RENDER_TARGET);
712                 if (!fb->rts[i].view)
713                         continue;
714 
715                 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
716                                tile_size * fb->rts[i].view->image->layout.nr_samples;
717 
718                 if (i != crc_rt)
719                         *(fb->rts[i].crc_valid) = false;
720         }
721         tags |= MALI_POSITIVE(MAX2(fb->rt_count, 1)) << 2;
722 
723         return tags;
724 }
725 #else /* PAN_ARCH == 4 */
726 static void
pan_emit_sfbd_tiler(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tiler_context * ctx,void * fbd)727 pan_emit_sfbd_tiler(const struct panfrost_device *dev,
728                     const struct pan_fb_info *fb,
729                     const struct pan_tiler_context *ctx,
730                     void *fbd)
731 {
732        pan_emit_midgard_tiler(dev, fb, ctx,
733                               pan_section_ptr(fbd, FRAMEBUFFER, TILER));
734 
735         /* All weights set to 0, nothing to do here */
736         pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding);
737         pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w);
738 }
739 
740 static void
pan_emit_sfbd(const struct panfrost_device * dev,const struct pan_fb_info * fb,const struct pan_tls_info * tls,const struct pan_tiler_context * tiler_ctx,void * fbd)741 pan_emit_sfbd(const struct panfrost_device *dev,
742               const struct pan_fb_info *fb,
743               const struct pan_tls_info *tls,
744               const struct pan_tiler_context *tiler_ctx,
745               void *fbd)
746 {
747         GENX(pan_emit_tls)(tls,
748                            pan_section_ptr(fbd, FRAMEBUFFER,
749                                            LOCAL_STORAGE));
750         pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
751                 cfg.bound_max_x = fb->width - 1;
752                 cfg.bound_max_y = fb->height - 1;
753                 cfg.dithering_enable = true;
754                 cfg.clean_pixel_write_enable = true;
755                 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
756                 if (fb->rts[0].clear) {
757                         cfg.clear_color_0 = fb->rts[0].clear_value[0];
758                         cfg.clear_color_1 = fb->rts[0].clear_value[1];
759                         cfg.clear_color_2 = fb->rts[0].clear_value[2];
760                         cfg.clear_color_3 = fb->rts[0].clear_value[3];
761                 }
762 
763                 if (fb->zs.clear.z)
764                         cfg.z_clear = fb->zs.clear_value.depth;
765 
766                 if (fb->zs.clear.s)
767                         cfg.s_clear = fb->zs.clear_value.stencil;
768 
769                 if (fb->rt_count && fb->rts[0].view) {
770                         const struct pan_image_view *rt = fb->rts[0].view;
771 
772                         const struct util_format_description *desc =
773                                 util_format_description(rt->format);
774 
775                         /* The swizzle for rendering is inverted from texturing */
776                         unsigned char swizzle[4];
777                         panfrost_invert_swizzle(desc->swizzle, swizzle);
778                         cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
779 
780                         struct pan_blendable_format fmt = panfrost_blendable_formats_v7[rt->format];
781                         if (fmt.internal) {
782                                 cfg.internal_format = fmt.internal;
783                                 cfg.color_writeback_format = fmt.writeback;
784                         } else {
785                                 unreachable("raw formats not finished for SFBD");
786                         }
787 
788                         unsigned level = rt->first_level;
789                         struct pan_surface surf;
790 
791                         pan_iview_get_surface(rt, 0, 0, 0, &surf);
792 
793                         cfg.color_write_enable = !fb->rts[0].discard;
794                         cfg.color_writeback.base = surf.data;
795                         cfg.color_writeback.row_stride =
796 	                        rt->image->layout.slices[level].row_stride;
797 
798                         cfg.color_block_format = mod_to_block_fmt(rt->image->layout.modifier);
799                         assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
800                                cfg.color_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
801 
802                         if (rt->image->layout.crc_mode != PAN_IMAGE_CRC_NONE) {
803                                 const struct pan_image_slice_layout *slice =
804                                         &rt->image->layout.slices[level];
805 
806                                 cfg.crc_buffer.row_stride = slice->crc.stride;
807                                 if (rt->image->layout.crc_mode == PAN_IMAGE_CRC_INBAND) {
808                                         cfg.crc_buffer.base = rt->image->data.bo->ptr.gpu +
809                                                               rt->image->data.offset +
810                                                               slice->crc.offset;
811                                 } else {
812                                         cfg.crc_buffer.base = rt->image->crc.bo->ptr.gpu +
813                                                               rt->image->crc.offset +
814                                                               slice->crc.offset;
815                                 }
816                         }
817                 }
818 
819                 if (fb->zs.view.zs) {
820                         const struct pan_image_view *zs = fb->zs.view.zs;
821                         unsigned level = zs->first_level;
822                         struct pan_surface surf;
823 
824                         pan_iview_get_surface(zs, 0, 0, 0, &surf);
825 
826                         cfg.zs_write_enable = !fb->zs.discard.z;
827                         cfg.zs_writeback.base = surf.data;
828                         cfg.zs_writeback.row_stride =
829                                 zs->image->layout.slices[level].row_stride;
830                         cfg.zs_block_format = mod_to_block_fmt(zs->image->layout.modifier);
831                         assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
832                                cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
833 
834                         cfg.zs_format = translate_zs_format(zs->format);
835                 }
836 
837                 cfg.sample_count = fb->nr_samples;
838 
839                 if (fb->rt_count)
840                         cfg.msaa = mali_sampling_mode(fb->rts[0].view);
841         }
842         pan_emit_sfbd_tiler(dev, fb, tiler_ctx, fbd);
843         pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding);
844 }
845 #endif
846 
847 unsigned
GENX(pan_emit_fbd)848 GENX(pan_emit_fbd)(const struct panfrost_device *dev,
849                    const struct pan_fb_info *fb,
850                    const struct pan_tls_info *tls,
851                    const struct pan_tiler_context *tiler_ctx,
852                    void *out)
853 {
854 #if PAN_ARCH == 4
855         assert(fb->rt_count <= 1);
856         pan_emit_sfbd(dev, fb, tls, tiler_ctx, out);
857         return 0;
858 #else
859         return pan_emit_mfbd(dev, fb, tls, tiler_ctx, out);
860 #endif
861 }
862 
863 #if PAN_ARCH >= 6
864 void
GENX(pan_emit_tiler_heap)865 GENX(pan_emit_tiler_heap)(const struct panfrost_device *dev,
866                           void *out)
867 {
868         pan_pack(out, TILER_HEAP, heap) {
869                 heap.size = dev->tiler_heap->size;
870                 heap.base = dev->tiler_heap->ptr.gpu;
871                 heap.bottom = dev->tiler_heap->ptr.gpu;
872                 heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
873         }
874 }
875 
876 void
GENX(pan_emit_tiler_ctx)877 GENX(pan_emit_tiler_ctx)(const struct panfrost_device *dev,
878                          unsigned fb_width, unsigned fb_height,
879                          unsigned nr_samples,
880                          mali_ptr heap,
881                          void *out)
882 {
883         unsigned max_levels = dev->tiler_features.max_levels;
884         assert(max_levels >= 2);
885 
886         pan_pack(out, TILER_CONTEXT, tiler) {
887                 /* TODO: Select hierarchy mask more effectively */
888                 tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
889                 tiler.fb_width = fb_width;
890                 tiler.fb_height = fb_height;
891                 tiler.heap = heap;
892                 tiler.sample_pattern = pan_sample_pattern(nr_samples);
893         }
894 }
895 #endif
896 
897 void
GENX(pan_emit_fragment_job)898 GENX(pan_emit_fragment_job)(const struct pan_fb_info *fb,
899                             mali_ptr fbd,
900                             void *out)
901 {
902         pan_section_pack(out, FRAGMENT_JOB, HEADER, header) {
903                 header.type = MALI_JOB_TYPE_FRAGMENT;
904                 header.index = 1;
905         }
906 
907         pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
908                 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
909                 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
910                 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
911                 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
912                 payload.framebuffer = fbd;
913 
914 #if PAN_ARCH >= 5
915                 if (fb->tile_map.base) {
916                         payload.has_tile_enable_map = true;
917                         payload.tile_enable_map = fb->tile_map.base;
918                         payload.tile_enable_map_row_stride = fb->tile_map.stride;
919                 }
920 #endif
921         }
922 }
923