1 /*
2  * Copyright (C) 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "genxml/gen_macros.h"
25 
26 #include "util/macros.h"
27 #include "compiler/shader_enums.h"
28 
29 #include "vk_util.h"
30 
31 #include "panfrost-quirks.h"
32 #include "pan_cs.h"
33 #include "pan_encoder.h"
34 #include "pan_pool.h"
35 #include "pan_shader.h"
36 
37 #include "panvk_cs.h"
38 #include "panvk_private.h"
39 #include "panvk_varyings.h"
40 
41 static enum mali_mipmap_mode
panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)42 panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
43 {
44    switch (mode) {
45    case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
46    case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
47    default: unreachable("Invalid mipmap mode");
48    }
49 }
50 
51 static unsigned
panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)52 panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
53 {
54    switch (mode) {
55    case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
56    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
57    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
58    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
59    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
60    default: unreachable("Invalid wrap");
61    }
62 }
63 
64 static void
panvk_translate_sampler_border_color(const VkSamplerCreateInfo * pCreateInfo,uint32_t border_color[4])65 panvk_translate_sampler_border_color(const VkSamplerCreateInfo *pCreateInfo,
66                                      uint32_t border_color[4])
67 {
68    const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor =
69       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
70 
71    switch (pCreateInfo->borderColor) {
72    case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
73    case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
74       border_color[0] = border_color[1] = border_color[2] = fui(0.0);
75       border_color[3] =
76          pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ?
77          fui(1.0) : fui(0.0);
78       break;
79    case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
80    case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
81       border_color[0] = border_color[1] = border_color[2] = 0;
82       border_color[3] =
83          pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
84          UINT_MAX : 0;
85       break;
86    case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
87       border_color[0] = border_color[1] = border_color[2] = border_color[3] = fui(1.0);
88       break;
89    case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
90       border_color[0] = border_color[1] = border_color[2] = border_color[3] = UINT_MAX;
91       break;
92    case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
93    case VK_BORDER_COLOR_INT_CUSTOM_EXT:
94       memcpy(border_color, pBorderColor->customBorderColor.int32, sizeof(uint32_t) * 4);
95       break;
96    default:
97       unreachable("Invalid border color");
98    }
99 }
100 
101 static mali_pixel_format
panvk_varying_hw_format(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx)102 panvk_varying_hw_format(const struct panvk_device *dev,
103                         const struct panvk_varyings_info *varyings,
104                         gl_shader_stage stage, unsigned idx)
105 {
106    const struct panfrost_device *pdev = &dev->physical_device->pdev;
107    gl_varying_slot loc = varyings->stage[stage].loc[idx];
108    bool fs = stage == MESA_SHADER_FRAGMENT;
109 
110    switch (loc) {
111    case VARYING_SLOT_PNTC:
112    case VARYING_SLOT_PSIZ:
113 #if PAN_ARCH <= 6
114       return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
115 #else
116       return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
117 #endif
118    case VARYING_SLOT_POS:
119 #if PAN_ARCH <= 6
120       return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
121              panfrost_get_default_swizzle(4);
122 #else
123       return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
124              MALI_RGB_COMPONENT_ORDER_RGBA;
125 #endif
126    default:
127       assert(!panvk_varying_is_builtin(stage, loc));
128       if (varyings->varying[loc].format != PIPE_FORMAT_NONE)
129          return pdev->formats[varyings->varying[loc].format].hw;
130 #if PAN_ARCH >= 7
131       return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
132 #else
133       return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
134 #endif
135    }
136 }
137 
138 static void
panvk_emit_varying(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx,void * attrib)139 panvk_emit_varying(const struct panvk_device *dev,
140                    const struct panvk_varyings_info *varyings,
141                    gl_shader_stage stage, unsigned idx,
142                    void *attrib)
143 {
144    gl_varying_slot loc = varyings->stage[stage].loc[idx];
145    bool fs = stage == MESA_SHADER_FRAGMENT;
146 
147    pan_pack(attrib, ATTRIBUTE, cfg) {
148       if (!panvk_varying_is_builtin(stage, loc)) {
149          cfg.buffer_index = varyings->varying[loc].buf;
150          cfg.offset = varyings->varying[loc].offset;
151       } else {
152          cfg.buffer_index =
153             panvk_varying_buf_index(varyings,
154                                     panvk_varying_buf_id(fs, loc));
155       }
156       cfg.offset_enable = PAN_ARCH == 5;
157       cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
158    }
159 }
160 
161 void
panvk_per_arch(emit_varyings)162 panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
163                               const struct panvk_varyings_info *varyings,
164                               gl_shader_stage stage,
165                               void *descs)
166 {
167    struct mali_attribute_packed *attrib = descs;
168 
169    for (unsigned i = 0; i < varyings->stage[stage].count; i++)
170       panvk_emit_varying(dev, varyings, stage, i, attrib++);
171 }
172 
173 static void
panvk_emit_varying_buf(const struct panvk_varyings_info * varyings,enum panvk_varying_buf_id id,void * buf)174 panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
175                        enum panvk_varying_buf_id id, void *buf)
176 {
177    unsigned buf_idx = panvk_varying_buf_index(varyings, id);
178 
179    pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
180 #if PAN_ARCH == 5
181       enum mali_attribute_special special_id = panvk_varying_special_buf_id(id);
182       if (special_id) {
183          cfg.type = 0;
184          cfg.special = special_id;
185          continue;
186       }
187 #endif
188       unsigned offset = varyings->buf[buf_idx].address & 63;
189 
190       cfg.stride = varyings->buf[buf_idx].stride;
191       cfg.size = varyings->buf[buf_idx].size + offset;
192       cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
193    }
194 }
195 
196 void
panvk_per_arch(emit_varying_bufs)197 panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
198                                   void *descs)
199 {
200    struct mali_attribute_buffer_packed *buf = descs;
201 
202    for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
203       if (varyings->buf_mask & (1 << i))
204          panvk_emit_varying_buf(varyings, i, buf++);
205    }
206 
207    /* We need an empty entry to stop prefetching on Bifrost */
208 #if PAN_ARCH >= 6
209    memset(buf, 0, sizeof(*buf));
210 #endif
211 }
212 
213 static void
panvk_emit_attrib_buf(const struct panvk_attribs_info * info,const struct panvk_draw_info * draw,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * desc)214 panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
215                       const struct panvk_draw_info *draw,
216                       const struct panvk_attrib_buf *bufs,
217                       unsigned buf_count,
218                       unsigned idx, void *desc)
219 {
220    const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
221 
222 #if PAN_ARCH == 5
223    if (buf_info->special) {
224       switch (buf_info->special_id) {
225       case PAN_VERTEX_ID:
226          panfrost_vertex_id(draw->padded_vertex_count, desc,
227                             draw->instance_count > 1);
228          return;
229       case PAN_INSTANCE_ID:
230          panfrost_instance_id(draw->padded_vertex_count, desc,
231                               draw->instance_count > 1);
232          return;
233       default:
234          unreachable("Invalid attribute ID");
235       }
236    }
237 #endif
238 
239    assert(idx < buf_count);
240    const struct panvk_attrib_buf *buf = &bufs[idx];
241    unsigned divisor = buf_info->per_instance ?
242                       draw->padded_vertex_count : 0;
243    unsigned stride = divisor && draw->instance_count == 1 ?
244                      0 : buf_info->stride;
245    mali_ptr addr = buf->address & ~63ULL;
246    unsigned size = buf->size + (buf->address & 63);
247 
248    /* TODO: support instanced arrays */
249    pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
250       if (draw->instance_count > 1 && divisor) {
251          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
252          cfg.divisor = divisor;
253       }
254 
255       cfg.pointer = addr;
256       cfg.stride = stride;
257       cfg.size = size;
258    }
259 }
260 
261 void
panvk_per_arch(emit_attrib_bufs)262 panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
263                                  const struct panvk_attrib_buf *bufs,
264                                  unsigned buf_count,
265                                  const struct panvk_draw_info *draw,
266                                  void *descs)
267 {
268    struct mali_attribute_buffer_packed *buf = descs;
269 
270    for (unsigned i = 0; i < info->buf_count; i++)
271       panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++);
272 
273    /* A NULL entry is needed to stop prefecting on Bifrost */
274 #if PAN_ARCH >= 6
275    memset(buf, 0, sizeof(*buf));
276 #endif
277 }
278 
279 void
panvk_per_arch(emit_sampler)280 panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
281                              void *desc)
282 {
283    uint32_t border_color[4];
284 
285    panvk_translate_sampler_border_color(pCreateInfo, border_color);
286 
287    pan_pack(desc, SAMPLER, cfg) {
288       cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
289       cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
290       cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
291       cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
292 
293       cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
294       cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
295       cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
296       cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
297       cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
298       cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
299       cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
300       cfg.border_color_r = border_color[0];
301       cfg.border_color_g = border_color[1];
302       cfg.border_color_b = border_color[2];
303       cfg.border_color_a = border_color[3];
304    }
305 }
306 
307 static void
panvk_emit_attrib(const struct panvk_device * dev,const struct panvk_attribs_info * attribs,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * attrib)308 panvk_emit_attrib(const struct panvk_device *dev,
309                   const struct panvk_attribs_info *attribs,
310                   const struct panvk_attrib_buf *bufs,
311                   unsigned buf_count,
312                   unsigned idx, void *attrib)
313 {
314    const struct panfrost_device *pdev = &dev->physical_device->pdev;
315 
316    pan_pack(attrib, ATTRIBUTE, cfg) {
317       cfg.buffer_index = attribs->attrib[idx].buf;
318       cfg.offset = attribs->attrib[idx].offset +
319                    (bufs[cfg.buffer_index].address & 63);
320       cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
321    }
322 }
323 
324 void
panvk_per_arch(emit_attribs)325 panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
326                              const struct panvk_attribs_info *attribs,
327                              const struct panvk_attrib_buf *bufs,
328                              unsigned buf_count,
329                              void *descs)
330 {
331    struct mali_attribute_packed *attrib = descs;
332 
333    for (unsigned i = 0; i < attribs->attrib_count; i++)
334       panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++);
335 }
336 
337 void
panvk_per_arch(emit_ubo)338 panvk_per_arch(emit_ubo)(mali_ptr address, size_t size,  void *desc)
339 {
340    pan_pack(desc, UNIFORM_BUFFER, cfg) {
341       cfg.pointer = address;
342       cfg.entries = DIV_ROUND_UP(size, 16);
343    }
344 }
345 
346 void
panvk_per_arch(emit_ubos)347 panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
348                           const struct panvk_descriptor_state *state,
349                           void *descs)
350 {
351    struct mali_uniform_buffer_packed *ubos = descs;
352 
353    for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) {
354       const struct panvk_descriptor_set_layout *set_layout =
355          pipeline->layout->sets[i].layout;
356       const struct panvk_descriptor_set *set = state->sets[i].set;
357       unsigned offset = pipeline->layout->sets[i].ubo_offset;
358 
359       if (!set_layout)
360          continue;
361 
362       if (!set) {
363          unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos;
364          memset(&ubos[offset], 0, num_ubos * sizeof(*ubos));
365       } else {
366          memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos));
367          if (set_layout->num_dynoffsets) {
368             panvk_per_arch(emit_ubo)(state->sets[i].dynoffsets.gpu,
369                                      set->layout->num_dynoffsets * sizeof(uint32_t),
370                                      &ubos[offset + set_layout->num_ubos]);
371          }
372       }
373    }
374 
375    for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
376       if (!pipeline->sysvals[i].ids.sysval_count)
377          continue;
378 
379       panvk_per_arch(emit_ubo)(pipeline->sysvals[i].ubo ? : state->sysvals[i],
380                                pipeline->sysvals[i].ids.sysval_count * 16,
381                                &ubos[pipeline->sysvals[i].ubo_idx]);
382    }
383 }
384 
385 void
panvk_per_arch(emit_vertex_job)386 panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
387                                 const struct panvk_draw_info *draw,
388                                 void *job)
389 {
390    void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
391 
392    memcpy(section, &draw->invocation, pan_size(INVOCATION));
393 
394    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
395       cfg.job_task_split = 5;
396    }
397 
398    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
399       cfg.draw_descriptor_is_64b = true;
400       cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
401       cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
402       cfg.attribute_buffers = draw->attribute_bufs;
403       cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
404       cfg.varying_buffers = draw->varying_bufs;
405       cfg.thread_storage = draw->tls;
406       cfg.offset_start = draw->offset_start;
407       cfg.instance_size = draw->instance_count > 1 ?
408                           draw->padded_vertex_count : 1;
409       cfg.uniform_buffers = draw->ubos;
410       cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
411       cfg.textures = draw->textures;
412       cfg.samplers = draw->samplers;
413    }
414 }
415 
416 static void
panvk_emit_tiler_primitive(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * prim)417 panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
418                            const struct panvk_draw_info *draw,
419                            void *prim)
420 {
421    pan_pack(prim, PRIMITIVE, cfg) {
422       cfg.draw_mode = pipeline->ia.topology;
423       if (pipeline->ia.writes_point_size)
424          cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
425 
426       cfg.first_provoking_vertex = true;
427       if (pipeline->ia.primitive_restart)
428          cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
429       cfg.job_task_split = 6;
430       /* TODO: indexed draws */
431       cfg.index_count = draw->vertex_count;
432    }
433 }
434 
435 static void
panvk_emit_tiler_primitive_size(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * primsz)436 panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
437                                 const struct panvk_draw_info *draw,
438                                 void *primsz)
439 {
440    pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
441       if (pipeline->ia.writes_point_size) {
442          cfg.size_array = draw->psiz;
443       } else {
444          cfg.constant = draw->line_width;
445       }
446    }
447 }
448 
449 static void
panvk_emit_tiler_dcd(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * dcd)450 panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
451                      const struct panvk_draw_info *draw,
452                      void *dcd)
453 {
454    pan_pack(dcd, DRAW, cfg) {
455       cfg.four_components_per_vertex = true;
456       cfg.draw_descriptor_is_64b = true;
457       cfg.front_face_ccw = pipeline->rast.front_ccw;
458       cfg.cull_front_face = pipeline->rast.cull_front_face;
459       cfg.cull_back_face = pipeline->rast.cull_back_face;
460       cfg.position = draw->position;
461       cfg.state = draw->fs_rsd;
462       cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
463       cfg.attribute_buffers = draw->attribute_bufs;
464       cfg.viewport = draw->viewport;
465       cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
466       cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
467 #if PAN_ARCH == 5
468       cfg.fbd = draw->fb;
469 #else
470       cfg.thread_storage = draw->tls;
471 #endif
472 
473       /* For all primitives but lines DRAW.flat_shading_vertex must
474        * be set to 0 and the provoking vertex is selected with the
475        * PRIMITIVE.first_provoking_vertex field.
476        */
477       if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
478           pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
479           pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
480          /* The logic is inverted on bifrost. */
481 #if PAN_ARCH == 5
482          cfg.flat_shading_vertex = false;
483 #else
484          cfg.flat_shading_vertex = true;
485 #endif
486       }
487 
488       cfg.offset_start = draw->offset_start;
489       cfg.instance_size = draw->instance_count > 1 ?
490                          draw->padded_vertex_count : 1;
491       cfg.uniform_buffers = draw->ubos;
492       cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
493       cfg.textures = draw->textures;
494       cfg.samplers = draw->samplers;
495 
496       /* TODO: occlusion queries */
497    }
498 }
499 
500 void
panvk_per_arch(emit_tiler_job)501 panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
502                                const struct panvk_draw_info *draw,
503                                void *job)
504 {
505    void *section;
506 
507    section = pan_section_ptr(job, TILER_JOB, INVOCATION);
508    memcpy(section, &draw->invocation, pan_size(INVOCATION));
509 
510    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
511    panvk_emit_tiler_primitive(pipeline, draw, section);
512 
513    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
514    panvk_emit_tiler_primitive_size(pipeline, draw, section);
515 
516    section = pan_section_ptr(job, TILER_JOB, DRAW);
517    panvk_emit_tiler_dcd(pipeline, draw, section);
518 
519 #if PAN_ARCH >= 6
520    pan_section_pack(job, TILER_JOB, TILER, cfg) {
521       cfg.address = draw->tiler_ctx->bifrost;
522    }
523    pan_section_pack(job, TILER_JOB, PADDING, padding);
524 #endif
525 }
526 
527 void
panvk_per_arch(emit_viewport)528 panvk_per_arch(emit_viewport)(const VkViewport *viewport,
529                               const VkRect2D *scissor,
530                               void *vpd)
531 {
532    /* The spec says "width must be greater than 0.0" */
533    assert(viewport->x >= 0);
534    int minx = (int)viewport->x;
535    int maxx = (int)(viewport->x + viewport->width);
536 
537    /* Viewport height can be negative */
538    int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
539    int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
540 
541    assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
542    miny = MAX2(scissor->offset.x, minx);
543    miny = MAX2(scissor->offset.y, miny);
544    maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
545    maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
546 
547    /* Make sure we don't end up with a max < min when width/height is 0 */
548    maxx = maxx > minx ? maxx - 1 : maxx;
549    maxy = maxy > miny ? maxy - 1 : maxy;
550 
551    assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
552    assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
553 
554    pan_pack(vpd, VIEWPORT, cfg) {
555       cfg.scissor_minimum_x = minx;
556       cfg.scissor_minimum_y = miny;
557       cfg.scissor_maximum_x = maxx;
558       cfg.scissor_maximum_y = maxy;
559       cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
560       cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
561    }
562 }
563 
564 #if PAN_ARCH >= 6
565 static enum mali_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)566 bifrost_blend_type_from_nir(nir_alu_type nir_type)
567 {
568    switch(nir_type) {
569    case 0: /* Render target not in use */
570       return 0;
571    case nir_type_float16:
572       return MALI_REGISTER_FILE_FORMAT_F16;
573    case nir_type_float32:
574       return MALI_REGISTER_FILE_FORMAT_F32;
575    case nir_type_int32:
576       return MALI_REGISTER_FILE_FORMAT_I32;
577    case nir_type_uint32:
578       return MALI_REGISTER_FILE_FORMAT_U32;
579    case nir_type_int16:
580       return MALI_REGISTER_FILE_FORMAT_I16;
581    case nir_type_uint16:
582       return MALI_REGISTER_FILE_FORMAT_U16;
583    default:
584       unreachable("Unsupported blend shader type for NIR alu type");
585    }
586 }
587 #endif
588 
589 void
panvk_per_arch(emit_blend)590 panvk_per_arch(emit_blend)(const struct panvk_device *dev,
591                            const struct panvk_pipeline *pipeline,
592                            unsigned rt, void *bd)
593 {
594    const struct pan_blend_state *blend = &pipeline->blend.state;
595    const struct pan_blend_rt_state *rts = &blend->rts[rt];
596    bool dithered = false;
597 
598    pan_pack(bd, BLEND, cfg) {
599       if (!blend->rt_count || !rts->equation.color_mask) {
600          cfg.enable = false;
601 #if PAN_ARCH >= 6
602          cfg.internal.mode = MALI_BLEND_MODE_OFF;
603 #endif
604          continue;
605       }
606 
607       cfg.srgb = util_format_is_srgb(rts->format);
608       cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
609       cfg.round_to_fb_precision = !dithered;
610 
611 #if PAN_ARCH <= 5
612       cfg.blend_shader = false;
613       pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
614                                            &cfg.equation);
615       cfg.constant =
616          pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
617                                 blend->constants);
618 #else
619       const struct panfrost_device *pdev = &dev->physical_device->pdev;
620       const struct util_format_description *format_desc =
621          util_format_description(rts->format);
622       unsigned chan_size = 0;
623       for (unsigned i = 0; i < format_desc->nr_channels; i++)
624          chan_size = MAX2(format_desc->channel[i].size, chan_size);
625 
626       pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
627                                            &cfg.equation);
628 
629       /* Fixed point constant */
630       float fconst =
631          pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
632                                 blend->constants);
633       u16 constant = fconst * ((1 << chan_size) - 1);
634       constant <<= 16 - chan_size;
635       cfg.constant = constant;
636 
637       if (pan_blend_is_opaque(blend->rts[rt].equation))
638          cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
639       else
640          cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
641 
642       /* If we want the conversion to work properly,
643        * num_comps must be set to 4
644        */
645       cfg.internal.fixed_function.num_comps = 4;
646       cfg.internal.fixed_function.conversion.memory_format =
647          panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
648       cfg.internal.fixed_function.conversion.register_format =
649          bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
650       cfg.internal.fixed_function.rt = rt;
651 #endif
652    }
653 }
654 
655 void
panvk_per_arch(emit_blend_constant)656 panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
657                                     const struct panvk_pipeline *pipeline,
658                                     unsigned rt, const float *constants,
659                                     void *bd)
660 {
661    float constant = constants[pipeline->blend.constant[rt].index];
662 
663    pan_pack(bd, BLEND, cfg) {
664       cfg.enable = false;
665 #if PAN_ARCH == 5
666       cfg.constant = constant;
667 #else
668       cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
669 #endif
670    }
671 }
672 
673 void
panvk_per_arch(emit_dyn_fs_rsd)674 panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
675                                 const struct panvk_cmd_state *state,
676                                 void *rsd)
677 {
678    pan_pack(rsd, RENDERER_STATE, cfg) {
679       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
680          cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
681          cfg.depth_factor = state->rast.depth_bias.slope_factor;
682          cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
683       }
684 
685       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
686          cfg.stencil_front.mask = state->zs.s_front.compare_mask;
687          cfg.stencil_back.mask = state->zs.s_back.compare_mask;
688       }
689 
690       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
691          cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
692          cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
693       }
694 
695       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
696          cfg.stencil_front.reference_value = state->zs.s_front.ref;
697          cfg.stencil_back.reference_value = state->zs.s_back.ref;
698       }
699    }
700 }
701 
702 void
panvk_per_arch(emit_base_fs_rsd)703 panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
704                                  const struct panvk_pipeline *pipeline,
705                                  void *rsd)
706 {
707    const struct pan_shader_info *info = &pipeline->fs.info;
708 
709    pan_pack(rsd, RENDERER_STATE, cfg) {
710       if (pipeline->fs.required) {
711          pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg);
712 
713 #if PAN_ARCH == 5
714          /* If either depth or stencil is enabled, discard matters */
715          bool zs_enabled =
716             (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) ||
717             pipeline->zs.s_test;
718 
719          cfg.properties.work_register_count = info->work_reg_count;
720          cfg.properties.force_early_z =
721             info->fs.can_early_z && !pipeline->ms.alpha_to_coverage &&
722             pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS;
723 
724 
725          /* Workaround a hardware errata where early-z cannot be enabled
726           * when discarding even when the depth buffer is read-only, by
727           * lying to the hardware about the discard and setting the
728           * reads tilebuffer? flag to compensate */
729          cfg.properties.shader_reads_tilebuffer =
730             info->fs.outputs_read ||
731             (!zs_enabled && info->fs.can_discard);
732          cfg.properties.shader_contains_discard =
733             zs_enabled && info->fs.can_discard;
734 #else
735          uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0;
736          uint8_t rt_mask = pipeline->fs.rt_mask;
737          cfg.properties.allow_forward_pixel_to_kill =
738                  pipeline->fs.info.fs.can_fpk &&
739                  !(rt_mask & ~rt_written) &&
740                  !pipeline->ms.alpha_to_coverage &&
741                  !pipeline->blend.reads_dest;
742 #endif
743       } else {
744 #if PAN_ARCH == 5
745          cfg.shader.shader = 0x1;
746          cfg.properties.work_register_count = 1;
747          cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
748          cfg.properties.force_early_z = true;
749 #else
750          cfg.properties.shader_modifies_coverage = true;
751          cfg.properties.allow_forward_pixel_to_kill = true;
752          cfg.properties.allow_forward_pixel_to_be_killed = true;
753          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
754 #endif
755       }
756 
757       bool msaa = pipeline->ms.rast_samples > 1;
758       cfg.multisample_misc.multisample_enable = msaa;
759       cfg.multisample_misc.sample_mask =
760          msaa ? pipeline->ms.sample_mask : UINT16_MAX;
761 
762       cfg.multisample_misc.depth_function =
763          pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
764 
765       cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
766       cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
767       cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
768       cfg.multisample_misc.shader_depth_range_fixed = true;
769 
770       cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
771       cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
772       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
773       cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable;
774       cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable;
775       cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
776 
777       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
778          cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
779          cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
780          cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
781       }
782 
783       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
784          cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
785          cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
786       }
787 
788       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
789          cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
790          cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
791       }
792 
793       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
794          cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
795          cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
796       }
797 
798       cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
799       cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
800       cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
801       cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
802       cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
803       cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
804       cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
805       cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
806    }
807 }
808 
809 void
panvk_per_arch(emit_non_fs_rsd)810 panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
811                                 const struct pan_shader_info *shader_info,
812                                 mali_ptr shader_ptr,
813                                 void *rsd)
814 {
815    assert(shader_info->stage != MESA_SHADER_FRAGMENT);
816 
817    pan_pack(rsd, RENDERER_STATE, cfg) {
818       pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
819    }
820 }
821 
822 void
panvk_per_arch(emit_tiler_context)823 panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
824                                    unsigned width, unsigned height,
825                                    const struct panfrost_ptr *descs)
826 {
827 #if PAN_ARCH == 5
828    unreachable("Not supported on v5");
829 #else
830    const struct panfrost_device *pdev = &dev->physical_device->pdev;
831 
832    pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
833       cfg.size = pdev->tiler_heap->size;
834       cfg.base = pdev->tiler_heap->ptr.gpu;
835       cfg.bottom = pdev->tiler_heap->ptr.gpu;
836       cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
837    }
838 
839    pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
840       cfg.hierarchy_mask = 0x28;
841       cfg.fb_width = width;
842       cfg.fb_height = height;
843       cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
844    }
845 #endif
846 }
847