1 /*
2  * Copyright (C) 2021 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "genxml/gen_macros.h"
25 
26 #include "util/macros.h"
27 #include "compiler/shader_enums.h"
28 
29 #include "vk_util.h"
30 
31 #include "pan_cs.h"
32 #include "pan_encoder.h"
33 #include "pan_pool.h"
34 #include "pan_shader.h"
35 
36 #include "panvk_cs.h"
37 #include "panvk_private.h"
38 #include "panvk_varyings.h"
39 
40 static enum mali_mipmap_mode
panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)41 panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode)
42 {
43    switch (mode) {
44    case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
45    case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
46    default: unreachable("Invalid mipmap mode");
47    }
48 }
49 
50 static unsigned
panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)51 panvk_translate_sampler_address_mode(VkSamplerAddressMode mode)
52 {
53    switch (mode) {
54    case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT;
55    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
56    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
57    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
58    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
59    default: unreachable("Invalid wrap");
60    }
61 }
62 
63 static void
panvk_translate_sampler_border_color(const VkSamplerCreateInfo * pCreateInfo,uint32_t border_color[4])64 panvk_translate_sampler_border_color(const VkSamplerCreateInfo *pCreateInfo,
65                                      uint32_t border_color[4])
66 {
67    const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor =
68       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
69 
70    switch (pCreateInfo->borderColor) {
71    case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
72    case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
73       border_color[0] = border_color[1] = border_color[2] = fui(0.0);
74       border_color[3] =
75          pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ?
76          fui(1.0) : fui(0.0);
77       break;
78    case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
79    case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
80       border_color[0] = border_color[1] = border_color[2] = 0;
81       border_color[3] =
82          pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ?
83          1 : 0;
84       break;
85    case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
86       border_color[0] = border_color[1] = border_color[2] = border_color[3] = fui(1.0);
87       break;
88    case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
89       border_color[0] = border_color[1] = border_color[2] = border_color[3] = 1;
90       break;
91    case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
92    case VK_BORDER_COLOR_INT_CUSTOM_EXT:
93       memcpy(border_color, pBorderColor->customBorderColor.int32, sizeof(uint32_t) * 4);
94       break;
95    default:
96       unreachable("Invalid border color");
97    }
98 }
99 
100 static mali_pixel_format
panvk_varying_hw_format(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx)101 panvk_varying_hw_format(const struct panvk_device *dev,
102                         const struct panvk_varyings_info *varyings,
103                         gl_shader_stage stage, unsigned idx)
104 {
105    const struct panfrost_device *pdev = &dev->physical_device->pdev;
106    gl_varying_slot loc = varyings->stage[stage].loc[idx];
107    bool fs = stage == MESA_SHADER_FRAGMENT;
108 
109    switch (loc) {
110    case VARYING_SLOT_PNTC:
111    case VARYING_SLOT_PSIZ:
112 #if PAN_ARCH <= 6
113       return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
114 #else
115       return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
116 #endif
117    case VARYING_SLOT_POS:
118 #if PAN_ARCH <= 6
119       return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
120              panfrost_get_default_swizzle(4);
121 #else
122       return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) |
123              MALI_RGB_COMPONENT_ORDER_RGBA;
124 #endif
125    default:
126       assert(!panvk_varying_is_builtin(stage, loc));
127       if (varyings->varying[loc].format != PIPE_FORMAT_NONE)
128          return pdev->formats[varyings->varying[loc].format].hw;
129 #if PAN_ARCH >= 7
130       return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
131 #else
132       return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
133 #endif
134    }
135 }
136 
137 static void
panvk_emit_varying(const struct panvk_device * dev,const struct panvk_varyings_info * varyings,gl_shader_stage stage,unsigned idx,void * attrib)138 panvk_emit_varying(const struct panvk_device *dev,
139                    const struct panvk_varyings_info *varyings,
140                    gl_shader_stage stage, unsigned idx,
141                    void *attrib)
142 {
143    gl_varying_slot loc = varyings->stage[stage].loc[idx];
144    bool fs = stage == MESA_SHADER_FRAGMENT;
145 
146    pan_pack(attrib, ATTRIBUTE, cfg) {
147       if (!panvk_varying_is_builtin(stage, loc)) {
148          cfg.buffer_index = varyings->varying[loc].buf;
149          cfg.offset = varyings->varying[loc].offset;
150       } else {
151          cfg.buffer_index =
152             panvk_varying_buf_index(varyings,
153                                     panvk_varying_buf_id(fs, loc));
154       }
155       cfg.offset_enable = PAN_ARCH == 5;
156       cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx);
157    }
158 }
159 
160 void
panvk_per_arch(emit_varyings)161 panvk_per_arch(emit_varyings)(const struct panvk_device *dev,
162                               const struct panvk_varyings_info *varyings,
163                               gl_shader_stage stage,
164                               void *descs)
165 {
166    struct mali_attribute_packed *attrib = descs;
167 
168    for (unsigned i = 0; i < varyings->stage[stage].count; i++)
169       panvk_emit_varying(dev, varyings, stage, i, attrib++);
170 }
171 
172 static void
panvk_emit_varying_buf(const struct panvk_varyings_info * varyings,enum panvk_varying_buf_id id,void * buf)173 panvk_emit_varying_buf(const struct panvk_varyings_info *varyings,
174                        enum panvk_varying_buf_id id, void *buf)
175 {
176    unsigned buf_idx = panvk_varying_buf_index(varyings, id);
177 
178    pan_pack(buf, ATTRIBUTE_BUFFER, cfg) {
179 #if PAN_ARCH == 5
180       enum mali_attribute_special special_id = panvk_varying_special_buf_id(id);
181       if (special_id) {
182          cfg.type = 0;
183          cfg.special = special_id;
184          continue;
185       }
186 #endif
187       unsigned offset = varyings->buf[buf_idx].address & 63;
188 
189       cfg.stride = varyings->buf[buf_idx].stride;
190       cfg.size = varyings->buf[buf_idx].size + offset;
191       cfg.pointer = varyings->buf[buf_idx].address & ~63ULL;
192    }
193 }
194 
195 void
panvk_per_arch(emit_varying_bufs)196 panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings,
197                                   void *descs)
198 {
199    struct mali_attribute_buffer_packed *buf = descs;
200 
201    for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
202       if (varyings->buf_mask & (1 << i))
203          panvk_emit_varying_buf(varyings, i, buf++);
204    }
205 }
206 
207 static void
panvk_emit_attrib_buf(const struct panvk_attribs_info * info,const struct panvk_draw_info * draw,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * desc)208 panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
209                       const struct panvk_draw_info *draw,
210                       const struct panvk_attrib_buf *bufs,
211                       unsigned buf_count,
212                       unsigned idx, void *desc)
213 {
214    const struct panvk_attrib_buf_info *buf_info = &info->buf[idx];
215 
216 #if PAN_ARCH == 5
217    if (buf_info->special) {
218       switch (buf_info->special_id) {
219       case PAN_VERTEX_ID:
220          panfrost_vertex_id(draw->padded_vertex_count, desc,
221                             draw->instance_count > 1);
222          return;
223       case PAN_INSTANCE_ID:
224          panfrost_instance_id(draw->padded_vertex_count, desc,
225                               draw->instance_count > 1);
226          return;
227       default:
228          unreachable("Invalid attribute ID");
229       }
230    }
231 #endif
232 
233    assert(idx < buf_count);
234    const struct panvk_attrib_buf *buf = &bufs[idx];
235    mali_ptr addr = buf->address & ~63ULL;
236    unsigned size = buf->size + (buf->address & 63);
237    unsigned divisor =
238       draw->padded_vertex_count * buf_info->instance_divisor;
239 
240    /* TODO: support instanced arrays */
241    if (draw->instance_count <= 1) {
242       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
243          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
244          cfg.stride = buf_info->per_instance ? 0 : buf_info->stride;
245          cfg.pointer = addr;
246          cfg.size = size;
247       }
248    } else if (!buf_info->per_instance) {
249       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
250          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
251          cfg.divisor = draw->padded_vertex_count;
252          cfg.stride = buf_info->stride;
253          cfg.pointer = addr;
254          cfg.size = size;
255       }
256    } else if (!divisor) {
257       /* instance_divisor == 0 means all instances share the same value.
258        * Make it a 1D array with a zero stride.
259        */
260       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
261          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
262          cfg.stride = 0;
263          cfg.pointer = addr;
264          cfg.size = size;
265       }
266    } else if (util_is_power_of_two_or_zero(divisor)) {
267       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
268          cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
269          cfg.stride = buf_info->stride;
270          cfg.pointer = addr;
271          cfg.size = size;
272          cfg.divisor_r = __builtin_ctz(divisor);
273       }
274    } else {
275       unsigned divisor_r = 0, divisor_e = 0;
276       unsigned divisor_num =
277          panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
278       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
279          cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
280          cfg.stride = buf_info->stride;
281          cfg.pointer = addr;
282          cfg.size = size;
283          cfg.divisor_r = divisor_r;
284          cfg.divisor_e = divisor_e;
285       }
286 
287       desc += pan_size(ATTRIBUTE_BUFFER);
288       pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
289          cfg.divisor_numerator = divisor_num;
290          cfg.divisor = buf_info->instance_divisor;
291       }
292    }
293 }
294 
295 void
panvk_per_arch(emit_attrib_bufs)296 panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
297                                  const struct panvk_attrib_buf *bufs,
298                                  unsigned buf_count,
299                                  const struct panvk_draw_info *draw,
300                                  void *descs)
301 {
302    struct mali_attribute_buffer_packed *buf = descs;
303 
304    for (unsigned i = 0; i < info->buf_count; i++) {
305       panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf);
306       buf += 2;
307    }
308 }
309 
310 void
panvk_per_arch(emit_sampler)311 panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
312                              void *desc)
313 {
314    uint32_t border_color[4];
315 
316    panvk_translate_sampler_border_color(pCreateInfo, border_color);
317 
318    pan_pack(desc, SAMPLER, cfg) {
319       cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
320       cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
321       cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode);
322       cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates;
323 
324       cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true);
325       cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false);
326       cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false);
327       cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU);
328       cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV);
329       cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW);
330       cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo);
331       cfg.border_color_r = border_color[0];
332       cfg.border_color_g = border_color[1];
333       cfg.border_color_b = border_color[2];
334       cfg.border_color_a = border_color[3];
335    }
336 }
337 
338 static void
panvk_emit_attrib(const struct panvk_device * dev,const struct panvk_draw_info * draw,const struct panvk_attribs_info * attribs,const struct panvk_attrib_buf * bufs,unsigned buf_count,unsigned idx,void * attrib)339 panvk_emit_attrib(const struct panvk_device *dev,
340                   const struct panvk_draw_info *draw,
341                   const struct panvk_attribs_info *attribs,
342                   const struct panvk_attrib_buf *bufs,
343                   unsigned buf_count,
344                   unsigned idx, void *attrib)
345 {
346    const struct panfrost_device *pdev = &dev->physical_device->pdev;
347    unsigned buf_idx = attribs->attrib[idx].buf;
348    const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx];
349 
350    pan_pack(attrib, ATTRIBUTE, cfg) {
351       cfg.buffer_index = buf_idx * 2;
352       cfg.offset = attribs->attrib[idx].offset +
353                    (bufs[buf_idx].address & 63);
354 
355       if (buf_info->per_instance)
356          cfg.offset += draw->first_instance * buf_info->stride;
357 
358       cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
359    }
360 }
361 
362 void
panvk_per_arch(emit_attribs)363 panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
364                              const struct panvk_draw_info *draw,
365                              const struct panvk_attribs_info *attribs,
366                              const struct panvk_attrib_buf *bufs,
367                              unsigned buf_count,
368                              void *descs)
369 {
370    struct mali_attribute_packed *attrib = descs;
371 
372    for (unsigned i = 0; i < attribs->attrib_count; i++)
373       panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++);
374 }
375 
376 void
panvk_per_arch(emit_ubo)377 panvk_per_arch(emit_ubo)(mali_ptr address, size_t size,  void *desc)
378 {
379    pan_pack(desc, UNIFORM_BUFFER, cfg) {
380       cfg.pointer = address;
381       cfg.entries = DIV_ROUND_UP(size, 16);
382    }
383 }
384 
385 void
panvk_per_arch(emit_ubos)386 panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
387                           const struct panvk_descriptor_state *state,
388                           void *descs)
389 {
390    struct mali_uniform_buffer_packed *ubos = descs;
391 
392    for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) {
393       const struct panvk_descriptor_set_layout *set_layout =
394          pipeline->layout->sets[i].layout;
395       const struct panvk_descriptor_set *set = state->sets[i];
396       unsigned offset = pipeline->layout->sets[i].ubo_offset;
397 
398       if (!set_layout)
399          continue;
400 
401       if (!set) {
402          memset(&ubos[offset], 0, set_layout->num_ubos * sizeof(*ubos));
403       } else {
404          memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos));
405       }
406    }
407 
408    unsigned offset = pipeline->layout->num_ubos;
409    for (unsigned i = 0; i < pipeline->layout->num_dyn_ubos; i++) {
410       const struct panvk_buffer_desc *bdesc = &state->dyn.ubos[i];
411       size_t size = (bdesc->size == VK_WHOLE_SIZE && bdesc->buffer) ?
412                     (bdesc->buffer->bo->size - bdesc->offset) :
413                     bdesc->size;
414       mali_ptr address = bdesc->buffer ? bdesc->buffer->bo->ptr.gpu + bdesc->offset : 0;
415 
416       if (size)
417          panvk_per_arch(emit_ubo)(address, size, &ubos[offset + i]);
418       else
419          memset(&ubos[offset + i], 0, sizeof(*ubos));
420    }
421 
422    for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
423       if (!pipeline->sysvals[i].ids.sysval_count)
424          continue;
425 
426       panvk_per_arch(emit_ubo)(pipeline->sysvals[i].ubo ? : state->sysvals[i],
427                                pipeline->sysvals[i].ids.sysval_count * 16,
428                                &ubos[pipeline->sysvals[i].ubo_idx]);
429    }
430 
431    if (pipeline->layout->push_constants.size) {
432       panvk_per_arch(emit_ubo)(state->push_constants,
433                                ALIGN_POT(pipeline->layout->push_constants.size, 16),
434                                &ubos[pipeline->layout->push_constants.ubo_idx]);
435    }
436 }
437 
438 void
panvk_per_arch(emit_vertex_job)439 panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline,
440                                 const struct panvk_draw_info *draw,
441                                 void *job)
442 {
443    void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
444 
445    memcpy(section, &draw->invocation, pan_size(INVOCATION));
446 
447    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
448       cfg.job_task_split = 5;
449    }
450 
451    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
452       cfg.draw_descriptor_is_64b = true;
453       cfg.state = pipeline->rsds[MESA_SHADER_VERTEX];
454       cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
455       cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs;
456       cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
457       cfg.varying_buffers = draw->varying_bufs;
458       cfg.thread_storage = draw->tls;
459       cfg.offset_start = draw->offset_start;
460       cfg.instance_size = draw->instance_count > 1 ?
461                           draw->padded_vertex_count : 1;
462       cfg.uniform_buffers = draw->ubos;
463       cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants;
464       cfg.textures = draw->textures;
465       cfg.samplers = draw->samplers;
466    }
467 }
468 
469 void
panvk_per_arch(emit_compute_job)470 panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline,
471                                  const struct panvk_dispatch_info *dispatch,
472                                  void *job)
473 {
474    panfrost_pack_work_groups_compute(pan_section_ptr(job, COMPUTE_JOB, INVOCATION),
475                                      dispatch->wg_count.x,
476                                      dispatch->wg_count.y,
477                                      dispatch->wg_count.z,
478                                      pipeline->cs.local_size.x,
479                                      pipeline->cs.local_size.y,
480                                      pipeline->cs.local_size.z,
481                                      false, false);
482 
483    pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
484       cfg.job_task_split =
485          util_logbase2_ceil(pipeline->cs.local_size.x + 1) +
486          util_logbase2_ceil(pipeline->cs.local_size.y + 1) +
487          util_logbase2_ceil(pipeline->cs.local_size.z + 1);
488    }
489 
490    pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) {
491       cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE];
492       cfg.attributes = dispatch->attributes;
493       cfg.attribute_buffers = dispatch->attribute_bufs;
494       cfg.thread_storage = dispatch->tsd;
495       cfg.uniform_buffers = dispatch->ubos;
496       cfg.push_uniforms = dispatch->push_uniforms;
497       cfg.textures = dispatch->textures;
498       cfg.samplers = dispatch->samplers;
499    }
500 }
501 
502 static void
panvk_emit_tiler_primitive(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * prim)503 panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline,
504                            const struct panvk_draw_info *draw,
505                            void *prim)
506 {
507    pan_pack(prim, PRIMITIVE, cfg) {
508       cfg.draw_mode = pipeline->ia.topology;
509       if (pipeline->ia.writes_point_size)
510          cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
511 
512       cfg.first_provoking_vertex = true;
513       if (pipeline->ia.primitive_restart)
514          cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
515       cfg.job_task_split = 6;
516 
517       if (draw->index_size) {
518          cfg.index_count = draw->index_count;
519          cfg.indices = draw->indices;
520          cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start;
521 
522          switch (draw->index_size) {
523          case 32: cfg.index_type = MALI_INDEX_TYPE_UINT32; break;
524          case 16: cfg.index_type = MALI_INDEX_TYPE_UINT16; break;
525          case 8: cfg.index_type = MALI_INDEX_TYPE_UINT8; break;
526          default: unreachable("Invalid index size");
527          }
528       } else {
529          cfg.index_count = draw->vertex_count;
530          cfg.index_type = MALI_INDEX_TYPE_NONE;
531       }
532    }
533 }
534 
535 static void
panvk_emit_tiler_primitive_size(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * primsz)536 panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline,
537                                 const struct panvk_draw_info *draw,
538                                 void *primsz)
539 {
540    pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
541       if (pipeline->ia.writes_point_size) {
542          cfg.size_array = draw->psiz;
543       } else {
544          cfg.constant = draw->line_width;
545       }
546    }
547 }
548 
549 static void
panvk_emit_tiler_dcd(const struct panvk_pipeline * pipeline,const struct panvk_draw_info * draw,void * dcd)550 panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline,
551                      const struct panvk_draw_info *draw,
552                      void *dcd)
553 {
554    pan_pack(dcd, DRAW, cfg) {
555       cfg.front_face_ccw = pipeline->rast.front_ccw;
556       cfg.cull_front_face = pipeline->rast.cull_front_face;
557       cfg.cull_back_face = pipeline->rast.cull_back_face;
558       cfg.position = draw->position;
559       cfg.state = draw->fs_rsd;
560       cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes;
561       cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs;
562       cfg.viewport = draw->viewport;
563       cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings;
564       cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
565 #if PAN_ARCH == 5
566       cfg.fbd = draw->fb;
567 #else
568       cfg.thread_storage = draw->tls;
569 #endif
570 
571       /* For all primitives but lines DRAW.flat_shading_vertex must
572        * be set to 0 and the provoking vertex is selected with the
573        * PRIMITIVE.first_provoking_vertex field.
574        */
575       if (pipeline->ia.topology == MALI_DRAW_MODE_LINES ||
576           pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP ||
577           pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) {
578          /* The logic is inverted on bifrost. */
579 #if PAN_ARCH == 5
580          cfg.flat_shading_vertex = false;
581 #else
582          cfg.flat_shading_vertex = true;
583 #endif
584       }
585 
586       cfg.offset_start = draw->offset_start;
587       cfg.instance_size = draw->instance_count > 1 ?
588                          draw->padded_vertex_count : 1;
589       cfg.uniform_buffers = draw->ubos;
590       cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants;
591       cfg.textures = draw->textures;
592       cfg.samplers = draw->samplers;
593 
594       /* TODO: occlusion queries */
595    }
596 }
597 
598 void
panvk_per_arch(emit_tiler_job)599 panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline,
600                                const struct panvk_draw_info *draw,
601                                void *job)
602 {
603    void *section;
604 
605    section = pan_section_ptr(job, TILER_JOB, INVOCATION);
606    memcpy(section, &draw->invocation, pan_size(INVOCATION));
607 
608    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE);
609    panvk_emit_tiler_primitive(pipeline, draw, section);
610 
611    section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
612    panvk_emit_tiler_primitive_size(pipeline, draw, section);
613 
614    section = pan_section_ptr(job, TILER_JOB, DRAW);
615    panvk_emit_tiler_dcd(pipeline, draw, section);
616 
617 #if PAN_ARCH >= 6
618    pan_section_pack(job, TILER_JOB, TILER, cfg) {
619       cfg.address = draw->tiler_ctx->bifrost;
620    }
621    pan_section_pack(job, TILER_JOB, PADDING, padding);
622 #endif
623 }
624 
625 void
panvk_per_arch(emit_viewport)626 panvk_per_arch(emit_viewport)(const VkViewport *viewport,
627                               const VkRect2D *scissor,
628                               void *vpd)
629 {
630    /* The spec says "width must be greater than 0.0" */
631    assert(viewport->x >= 0);
632    int minx = (int)viewport->x;
633    int maxx = (int)(viewport->x + viewport->width);
634 
635    /* Viewport height can be negative */
636    int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
637    int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
638 
639    assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
640    miny = MAX2(scissor->offset.x, minx);
641    miny = MAX2(scissor->offset.y, miny);
642    maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
643    maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
644 
645    /* Make sure we don't end up with a max < min when width/height is 0 */
646    maxx = maxx > minx ? maxx - 1 : maxx;
647    maxy = maxy > miny ? maxy - 1 : maxy;
648 
649    assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
650    assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
651 
652    pan_pack(vpd, VIEWPORT, cfg) {
653       cfg.scissor_minimum_x = minx;
654       cfg.scissor_minimum_y = miny;
655       cfg.scissor_maximum_x = maxx;
656       cfg.scissor_maximum_y = maxy;
657       cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
658       cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
659    }
660 }
661 
662 #if PAN_ARCH >= 6
663 static enum mali_register_file_format
bifrost_blend_type_from_nir(nir_alu_type nir_type)664 bifrost_blend_type_from_nir(nir_alu_type nir_type)
665 {
666    switch(nir_type) {
667    case 0: /* Render target not in use */
668       return 0;
669    case nir_type_float16:
670       return MALI_REGISTER_FILE_FORMAT_F16;
671    case nir_type_float32:
672       return MALI_REGISTER_FILE_FORMAT_F32;
673    case nir_type_int32:
674       return MALI_REGISTER_FILE_FORMAT_I32;
675    case nir_type_uint32:
676       return MALI_REGISTER_FILE_FORMAT_U32;
677    case nir_type_int16:
678       return MALI_REGISTER_FILE_FORMAT_I16;
679    case nir_type_uint16:
680       return MALI_REGISTER_FILE_FORMAT_U16;
681    default:
682       unreachable("Unsupported blend shader type for NIR alu type");
683    }
684 }
685 #endif
686 
687 void
panvk_per_arch(emit_blend)688 panvk_per_arch(emit_blend)(const struct panvk_device *dev,
689                            const struct panvk_pipeline *pipeline,
690                            unsigned rt, void *bd)
691 {
692    const struct pan_blend_state *blend = &pipeline->blend.state;
693    const struct pan_blend_rt_state *rts = &blend->rts[rt];
694    bool dithered = false;
695 
696    pan_pack(bd, BLEND, cfg) {
697       if (!blend->rt_count || !rts->equation.color_mask) {
698          cfg.enable = false;
699 #if PAN_ARCH >= 6
700          cfg.internal.mode = MALI_BLEND_MODE_OFF;
701 #endif
702          continue;
703       }
704 
705       cfg.srgb = util_format_is_srgb(rts->format);
706       cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation);
707       cfg.round_to_fb_precision = !dithered;
708 
709 #if PAN_ARCH <= 5
710       cfg.blend_shader = false;
711       pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
712                                            &cfg.equation);
713       cfg.constant =
714          pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
715                                 blend->constants);
716 #else
717       const struct panfrost_device *pdev = &dev->physical_device->pdev;
718       const struct util_format_description *format_desc =
719          util_format_description(rts->format);
720       unsigned chan_size = 0;
721       for (unsigned i = 0; i < format_desc->nr_channels; i++)
722          chan_size = MAX2(format_desc->channel[i].size, chan_size);
723 
724       pan_blend_to_fixed_function_equation(blend->rts[rt].equation,
725                                            &cfg.equation);
726 
727       /* Fixed point constant */
728       float fconst =
729          pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation),
730                                 blend->constants);
731       u16 constant = fconst * ((1 << chan_size) - 1);
732       constant <<= 16 - chan_size;
733       cfg.constant = constant;
734 
735       if (pan_blend_is_opaque(blend->rts[rt].equation)) {
736          cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
737       } else {
738          cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION;
739 
740          cfg.internal.fixed_function.alpha_zero_nop =
741                  pan_blend_alpha_zero_nop(blend->rts[rt].equation);
742          cfg.internal.fixed_function.alpha_one_store =
743                  pan_blend_alpha_one_store(blend->rts[rt].equation);
744       }
745 
746       /* If we want the conversion to work properly,
747        * num_comps must be set to 4
748        */
749       cfg.internal.fixed_function.num_comps = 4;
750       cfg.internal.fixed_function.conversion.memory_format =
751          panfrost_format_to_bifrost_blend(pdev, rts->format, dithered);
752       cfg.internal.fixed_function.conversion.register_format =
753          bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
754       cfg.internal.fixed_function.rt = rt;
755 #endif
756    }
757 }
758 
759 void
panvk_per_arch(emit_blend_constant)760 panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev,
761                                     const struct panvk_pipeline *pipeline,
762                                     unsigned rt, const float *constants,
763                                     void *bd)
764 {
765    float constant = constants[pipeline->blend.constant[rt].index];
766 
767    pan_pack(bd, BLEND, cfg) {
768       cfg.enable = false;
769 #if PAN_ARCH == 5
770       cfg.constant = constant;
771 #else
772       cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor;
773 #endif
774    }
775 }
776 
777 void
panvk_per_arch(emit_dyn_fs_rsd)778 panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline,
779                                 const struct panvk_cmd_state *state,
780                                 void *rsd)
781 {
782    pan_pack(rsd, RENDERER_STATE, cfg) {
783       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) {
784          cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f;
785          cfg.depth_factor = state->rast.depth_bias.slope_factor;
786          cfg.depth_bias_clamp = state->rast.depth_bias.clamp;
787       }
788 
789       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
790          cfg.stencil_front.mask = state->zs.s_front.compare_mask;
791          cfg.stencil_back.mask = state->zs.s_back.compare_mask;
792       }
793 
794       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
795          cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask;
796          cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask;
797       }
798 
799       if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
800          cfg.stencil_front.reference_value = state->zs.s_front.ref;
801          cfg.stencil_back.reference_value = state->zs.s_back.ref;
802       }
803    }
804 }
805 
806 void
panvk_per_arch(emit_base_fs_rsd)807 panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev,
808                                  const struct panvk_pipeline *pipeline,
809                                  void *rsd)
810 {
811    const struct pan_shader_info *info = &pipeline->fs.info;
812 
813    pan_pack(rsd, RENDERER_STATE, cfg) {
814       if (pipeline->fs.required) {
815          pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg);
816 
817 #if PAN_ARCH == 5
818          /* If either depth or stencil is enabled, discard matters */
819          bool zs_enabled =
820             (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) ||
821             pipeline->zs.s_test;
822 
823          cfg.properties.work_register_count = info->work_reg_count;
824          cfg.properties.force_early_z =
825             info->fs.can_early_z && !pipeline->ms.alpha_to_coverage &&
826             pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS;
827 
828 
829          /* Workaround a hardware errata where early-z cannot be enabled
830           * when discarding even when the depth buffer is read-only, by
831           * lying to the hardware about the discard and setting the
832           * reads tilebuffer? flag to compensate */
833          cfg.properties.shader_reads_tilebuffer =
834             info->fs.outputs_read ||
835             (!zs_enabled && info->fs.can_discard);
836          cfg.properties.shader_contains_discard =
837             zs_enabled && info->fs.can_discard;
838 #else
839          uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0;
840          uint8_t rt_mask = pipeline->fs.rt_mask;
841          cfg.properties.allow_forward_pixel_to_kill =
842                  pipeline->fs.info.fs.can_fpk &&
843                  !(rt_mask & ~rt_written) &&
844                  !pipeline->ms.alpha_to_coverage &&
845                  !pipeline->blend.reads_dest;
846 #endif
847       } else {
848 #if PAN_ARCH == 5
849          cfg.shader.shader = 0x1;
850          cfg.properties.work_register_count = 1;
851          cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
852          cfg.properties.force_early_z = true;
853 #else
854          cfg.properties.shader_modifies_coverage = true;
855          cfg.properties.allow_forward_pixel_to_kill = true;
856          cfg.properties.allow_forward_pixel_to_be_killed = true;
857          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
858 #endif
859       }
860 
861       bool msaa = pipeline->ms.rast_samples > 1;
862       cfg.multisample_misc.multisample_enable = msaa;
863       cfg.multisample_misc.sample_mask =
864          msaa ? pipeline->ms.sample_mask : UINT16_MAX;
865 
866       cfg.multisample_misc.depth_function =
867          pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS;
868 
869       cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write;
870       cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth;
871       cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth;
872       cfg.multisample_misc.shader_depth_range_fixed = true;
873 
874       cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test;
875       cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage;
876       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
877       cfg.stencil_mask_misc.front_facing_depth_bias = pipeline->rast.depth_bias.enable;
878       cfg.stencil_mask_misc.back_facing_depth_bias = pipeline->rast.depth_bias.enable;
879       cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1;
880 
881       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) {
882          cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f;
883          cfg.depth_factor = pipeline->rast.depth_bias.slope_factor;
884          cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp;
885       }
886 
887       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) {
888          cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask;
889          cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask;
890       }
891 
892       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) {
893          cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask;
894          cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask;
895       }
896 
897       if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) {
898          cfg.stencil_front.reference_value = pipeline->zs.s_front.ref;
899          cfg.stencil_back.reference_value = pipeline->zs.s_back.ref;
900       }
901 
902       cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func;
903       cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op;
904       cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op;
905       cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op;
906       cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func;
907       cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op;
908       cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op;
909       cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op;
910    }
911 }
912 
913 void
panvk_per_arch(emit_non_fs_rsd)914 panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev,
915                                 const struct pan_shader_info *shader_info,
916                                 mali_ptr shader_ptr,
917                                 void *rsd)
918 {
919    assert(shader_info->stage != MESA_SHADER_FRAGMENT);
920 
921    pan_pack(rsd, RENDERER_STATE, cfg) {
922       pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
923    }
924 }
925 
926 void
panvk_per_arch(emit_tiler_context)927 panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev,
928                                    unsigned width, unsigned height,
929                                    const struct panfrost_ptr *descs)
930 {
931 #if PAN_ARCH == 5
932    unreachable("Not supported on v5");
933 #else
934    const struct panfrost_device *pdev = &dev->physical_device->pdev;
935 
936    pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) {
937       cfg.size = pdev->tiler_heap->size;
938       cfg.base = pdev->tiler_heap->ptr.gpu;
939       cfg.bottom = pdev->tiler_heap->ptr.gpu;
940       cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size;
941    }
942 
943    pan_pack(descs->cpu, TILER_CONTEXT, cfg) {
944       cfg.hierarchy_mask = 0x28;
945       cfg.fb_width = width;
946       cfg.fb_height = height;
947       cfg.heap = descs->gpu + pan_size(TILER_CONTEXT);
948    }
949 #endif
950 }
951