1 /*
2  * Copyright © 2021 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "broadcom/common/v3d_macros.h"
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "broadcom/compiler/v3d_compiler.h"
28 
29 #include "vk_format_info.h"
30 
31 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)32 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
33 {
34    switch (factor) {
35    case VK_BLEND_FACTOR_ZERO:
36    case VK_BLEND_FACTOR_ONE:
37    case VK_BLEND_FACTOR_SRC_COLOR:
38    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
39    case VK_BLEND_FACTOR_DST_COLOR:
40    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
41    case VK_BLEND_FACTOR_SRC_ALPHA:
42    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
43    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
44       return factor;
45    case VK_BLEND_FACTOR_CONSTANT_COLOR:
46    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
47    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
48    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
49       *needs_constants = true;
50       return factor;
51    case VK_BLEND_FACTOR_DST_ALPHA:
52       return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
53                              V3D_BLEND_FACTOR_DST_ALPHA;
54    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
55       return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
56                              V3D_BLEND_FACTOR_INV_DST_ALPHA;
57    case VK_BLEND_FACTOR_SRC1_COLOR:
58    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
59    case VK_BLEND_FACTOR_SRC1_ALPHA:
60    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
61       assert(!"Invalid blend factor: dual source blending not supported.");
62    default:
63       assert(!"Unknown blend factor.");
64    }
65 
66    /* Should be handled by the switch, added to avoid a "end of non-void
67     * function" error
68     */
69    unreachable("Unknown blend factor.");
70 }
71 
72 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)73 pack_blend(struct v3dv_pipeline *pipeline,
74            const VkPipelineColorBlendStateCreateInfo *cb_info)
75 {
76    /* By default, we are not enabling blending and all color channel writes are
77     * enabled. Color write enables are independent of whether blending is
78     * enabled or not.
79     *
80     * Vulkan specifies color write masks so that bits set correspond to
81     * enabled channels. Our hardware does it the other way around.
82     */
83    pipeline->blend.enables = 0;
84    pipeline->blend.color_write_masks = 0; /* All channels enabled */
85 
86    if (!cb_info)
87       return;
88 
89    assert(pipeline->subpass);
90    if (pipeline->subpass->color_count == 0)
91       return;
92 
93    assert(pipeline->subpass->color_count == cb_info->attachmentCount);
94 
95    pipeline->blend.needs_color_constants = false;
96    uint32_t color_write_masks = 0;
97    for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
98       const VkPipelineColorBlendAttachmentState *b_state =
99          &cb_info->pAttachments[i];
100 
101       uint32_t attachment_idx =
102          pipeline->subpass->color_attachments[i].attachment;
103       if (attachment_idx == VK_ATTACHMENT_UNUSED)
104          continue;
105 
106       color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
107 
108       if (!b_state->blendEnable)
109          continue;
110 
111       VkAttachmentDescription *desc =
112          &pipeline->pass->attachments[attachment_idx].desc;
113       const struct v3dv_format *format = v3dX(get_format)(desc->format);
114       bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
115 
116       uint8_t rt_mask = 1 << i;
117       pipeline->blend.enables |= rt_mask;
118 
119       v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
120          config.render_target_mask = rt_mask;
121 
122          config.color_blend_mode = b_state->colorBlendOp;
123          config.color_blend_dst_factor =
124             blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
125                          &pipeline->blend.needs_color_constants);
126          config.color_blend_src_factor =
127             blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
128                          &pipeline->blend.needs_color_constants);
129 
130          config.alpha_blend_mode = b_state->alphaBlendOp;
131          config.alpha_blend_dst_factor =
132             blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
133                          &pipeline->blend.needs_color_constants);
134          config.alpha_blend_src_factor =
135             blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
136                          &pipeline->blend.needs_color_constants);
137       }
138    }
139 
140    pipeline->blend.color_write_masks = color_write_masks;
141 }
142 
143 /* This requires that pack_blend() had been called before so we can set
144  * the overall blend enable bit in the CFG_BITS packet.
145  */
146 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT * pv_info,const VkPipelineMultisampleStateCreateInfo * ms_info)147 pack_cfg_bits(struct v3dv_pipeline *pipeline,
148               const VkPipelineDepthStencilStateCreateInfo *ds_info,
149               const VkPipelineRasterizationStateCreateInfo *rs_info,
150               const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
151               const VkPipelineMultisampleStateCreateInfo *ms_info)
152 {
153    assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
154 
155    pipeline->msaa =
156       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
157 
158    v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
159       config.enable_forward_facing_primitive =
160          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
161 
162       config.enable_reverse_facing_primitive =
163          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
164 
165       /* Seems like the hardware is backwards regarding this setting... */
166       config.clockwise_primitives =
167          rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
168 
169       config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
170 
171       /* This is required to pass line rasterization tests in CTS while
172        * exposing, at least, a minimum of 4-bits of subpixel precision
173        * (the minimum requirement).
174        */
175       config.line_rasterization = 1; /* perp end caps */
176 
177       if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
178          config.direct3d_wireframe_triangles_mode = true;
179          config.direct3d_point_fill_mode =
180             rs_info->polygonMode == VK_POLYGON_MODE_POINT;
181       }
182 
183       config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
184 
185       /* From the Vulkan spec:
186        *
187        *   "Provoking Vertex:
188        *
189        *       The vertex in a primitive from which flat shaded attribute
190        *       values are taken. This is generally the “first” vertex in the
191        *       primitive, and depends on the primitive topology."
192        *
193        * First vertex is the Direct3D style for provoking vertex. OpenGL uses
194        * the last vertex by default.
195        */
196       if (pv_info) {
197          config.direct3d_provoking_vertex =
198             pv_info->provokingVertexMode ==
199                VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
200       } else {
201          config.direct3d_provoking_vertex = true;
202       }
203 
204       config.blend_enable = pipeline->blend.enables != 0;
205 
206       /* Disable depth/stencil if we don't have a D/S attachment */
207       bool has_ds_attachment =
208          pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
209 
210       if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
211          config.z_updates_enable = ds_info->depthWriteEnable;
212          config.depth_test_function = ds_info->depthCompareOp;
213       } else {
214          config.depth_test_function = VK_COMPARE_OP_ALWAYS;
215       }
216 
217       /* EZ state will be updated at draw time based on bound pipeline state */
218       config.early_z_updates_enable = false;
219       config.early_z_enable = false;
220 
221       config.stencil_enable =
222          ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
223 
224       pipeline->z_updates_enable = config.z_updates_enable;
225    };
226 }
227 
228 static uint32_t
translate_stencil_op(enum pipe_stencil_op op)229 translate_stencil_op(enum pipe_stencil_op op)
230 {
231    switch (op) {
232    case VK_STENCIL_OP_KEEP:
233       return V3D_STENCIL_OP_KEEP;
234    case VK_STENCIL_OP_ZERO:
235       return V3D_STENCIL_OP_ZERO;
236    case VK_STENCIL_OP_REPLACE:
237       return V3D_STENCIL_OP_REPLACE;
238    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
239       return V3D_STENCIL_OP_INCR;
240    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
241       return V3D_STENCIL_OP_DECR;
242    case VK_STENCIL_OP_INVERT:
243       return V3D_STENCIL_OP_INVERT;
244    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
245       return V3D_STENCIL_OP_INCWRAP;
246    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
247       return V3D_STENCIL_OP_DECWRAP;
248    default:
249       unreachable("bad stencil op");
250    }
251 }
252 
253 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state)254 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
255                         uint8_t *stencil_cfg,
256                         bool is_front,
257                         bool is_back,
258                         const VkStencilOpState *stencil_state)
259 {
260    /* From the Vulkan spec:
261     *
262     *   "Reference is an integer reference value that is used in the unsigned
263     *    stencil comparison. The reference value used by stencil comparison
264     *    must be within the range [0,2^s-1] , where s is the number of bits in
265     *    the stencil framebuffer attachment, otherwise the reference value is
266     *    considered undefined."
267     *
268     * In our case, 's' is always 8, so we clamp to that to prevent our packing
269     * functions to assert in debug mode if they see larger values.
270     *
271     * If we have dynamic state we need to make sure we set the corresponding
272     * state bits to 0, since cl_emit_with_prepacked ORs the new value with
273     * the old.
274     */
275    const uint8_t write_mask =
276       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
277          0 : stencil_state->writeMask & 0xff;
278 
279    const uint8_t compare_mask =
280       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
281          0 : stencil_state->compareMask & 0xff;
282 
283    const uint8_t reference =
284       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
285          0 : stencil_state->reference & 0xff;
286 
287    v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
288       config.front_config = is_front;
289       config.back_config = is_back;
290       config.stencil_write_mask = write_mask;
291       config.stencil_test_mask = compare_mask;
292       config.stencil_test_function = stencil_state->compareOp;
293       config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
294       config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
295       config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
296       config.stencil_ref_value = reference;
297    }
298 }
299 
300 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)301 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
302                  const VkPipelineDepthStencilStateCreateInfo *ds_info)
303 {
304    assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
305 
306    if (!ds_info || !ds_info->stencilTestEnable)
307       return;
308 
309    if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
310       return;
311 
312    const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
313                                            V3DV_DYNAMIC_STENCIL_WRITE_MASK |
314                                            V3DV_DYNAMIC_STENCIL_REFERENCE;
315 
316 
317    /* If front != back or we have dynamic stencil state we can't emit a single
318     * packet for both faces.
319     */
320    bool needs_front_and_back = false;
321    if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
322        memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
323       needs_front_and_back = true;
324 
325    /* If the front and back configurations are the same we can emit both with
326     * a single packet.
327     */
328    pipeline->emit_stencil_cfg[0] = true;
329    if (!needs_front_and_back) {
330       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
331                               true, true, &ds_info->front);
332    } else {
333       pipeline->emit_stencil_cfg[1] = true;
334       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
335                               true, false, &ds_info->front);
336       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
337                               false, true, &ds_info->back);
338    }
339 }
340 
341 void
v3dX(pipeline_pack_state)342 v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
343                           const VkPipelineColorBlendStateCreateInfo *cb_info,
344                           const VkPipelineDepthStencilStateCreateInfo *ds_info,
345                           const VkPipelineRasterizationStateCreateInfo *rs_info,
346                           const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
347                           const VkPipelineMultisampleStateCreateInfo *ms_info)
348 {
349    pack_blend(pipeline, cb_info);
350    pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ms_info);
351    pack_stencil_cfg(pipeline, ds_info);
352 }
353 
354 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)355 pack_shader_state_record(struct v3dv_pipeline *pipeline)
356 {
357    assert(sizeof(pipeline->shader_state_record) ==
358           cl_packet_length(GL_SHADER_STATE_RECORD));
359 
360    struct v3d_fs_prog_data *prog_data_fs =
361       pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
362 
363    struct v3d_vs_prog_data *prog_data_vs =
364       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
365 
366    struct v3d_vs_prog_data *prog_data_vs_bin =
367       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
368 
369 
370    /* Note: we are not packing addresses, as we need the job (see
371     * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
372     * point as they depend on dynamic info that can be set after create the
373     * pipeline (like viewport), . Would need to be filled later, so we are
374     * doing a partial prepacking.
375     */
376    v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
377       shader.enable_clipping = true;
378 
379       if (!pipeline->has_gs) {
380          shader.point_size_in_shaded_vertex_data =
381             pipeline->topology == PIPE_PRIM_POINTS;
382       } else {
383          struct v3d_gs_prog_data *prog_data_gs =
384             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
385          shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
386       }
387 
388       /* Must be set if the shader modifies Z, discards, or modifies
389        * the sample mask.  For any of these cases, the fragment
390        * shader needs to write the Z value (even just discards).
391        */
392       shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
393       /* Set if the EZ test must be disabled (due to shader side
394        * effects and the early_z flag not being present in the
395        * shader).
396        */
397       shader.turn_off_early_z_test = prog_data_fs->disable_ez;
398 
399       shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
400          prog_data_fs->uses_center_w;
401 
402       /* The description for gl_SampleID states that if a fragment shader reads
403        * it, then we should automatically activate per-sample shading. However,
404        * the Vulkan spec also states that if a framebuffer has no attachments:
405        *
406        *    "The subpass continues to use the width, height, and layers of the
407        *     framebuffer to define the dimensions of the rendering area, and the
408        *     rasterizationSamples from each pipeline’s
409        *     VkPipelineMultisampleStateCreateInfo to define the number of
410        *     samples used in rasterization multisample rasterization."
411        *
412        * So in this scenario, if the pipeline doesn't enable multiple samples
413        * but the fragment shader accesses gl_SampleID we would be requested
414        * to do per-sample shading in single sample rasterization mode, which
415        * is pointless, so just disable it in that case.
416        */
417       shader.enable_sample_rate_shading =
418          pipeline->sample_rate_shading ||
419          (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
420 
421       shader.any_shader_reads_hardware_written_primitive_id = false;
422 
423       shader.do_scoreboard_wait_on_first_thread_switch =
424          prog_data_fs->lock_scoreboard_on_first_thrsw;
425       shader.disable_implicit_point_line_varyings =
426          !prog_data_fs->uses_implicit_point_line_varyings;
427 
428       shader.number_of_varyings_in_fragment_shader =
429          prog_data_fs->num_inputs;
430 
431       shader.coordinate_shader_propagate_nans = true;
432       shader.vertex_shader_propagate_nans = true;
433       shader.fragment_shader_propagate_nans = true;
434 
435       /* Note: see previous note about adresses */
436       /* shader.coordinate_shader_code_address */
437       /* shader.vertex_shader_code_address */
438       /* shader.fragment_shader_code_address */
439 
440       /* FIXME: Use combined input/output size flag in the common case (also
441        * on v3d, see v3dx_draw).
442        */
443       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
444          prog_data_vs_bin->separate_segments;
445       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
446          prog_data_vs->separate_segments;
447 
448       shader.coordinate_shader_input_vpm_segment_size =
449          prog_data_vs_bin->separate_segments ?
450          prog_data_vs_bin->vpm_input_size : 1;
451       shader.vertex_shader_input_vpm_segment_size =
452          prog_data_vs->separate_segments ?
453          prog_data_vs->vpm_input_size : 1;
454 
455       shader.coordinate_shader_output_vpm_segment_size =
456          prog_data_vs_bin->vpm_output_size;
457       shader.vertex_shader_output_vpm_segment_size =
458          prog_data_vs->vpm_output_size;
459 
460       /* Note: see previous note about adresses */
461       /* shader.coordinate_shader_uniforms_address */
462       /* shader.vertex_shader_uniforms_address */
463       /* shader.fragment_shader_uniforms_address */
464 
465       shader.min_coord_shader_input_segments_required_in_play =
466          pipeline->vpm_cfg_bin.As;
467       shader.min_vertex_shader_input_segments_required_in_play =
468          pipeline->vpm_cfg.As;
469 
470       shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
471          pipeline->vpm_cfg_bin.Ve;
472       shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
473          pipeline->vpm_cfg.Ve;
474 
475       shader.coordinate_shader_4_way_threadable =
476          prog_data_vs_bin->base.threads == 4;
477       shader.vertex_shader_4_way_threadable =
478          prog_data_vs->base.threads == 4;
479       shader.fragment_shader_4_way_threadable =
480          prog_data_fs->base.threads == 4;
481 
482       shader.coordinate_shader_start_in_final_thread_section =
483          prog_data_vs_bin->base.single_seg;
484       shader.vertex_shader_start_in_final_thread_section =
485          prog_data_vs->base.single_seg;
486       shader.fragment_shader_start_in_final_thread_section =
487          prog_data_fs->base.single_seg;
488 
489       shader.vertex_id_read_by_coordinate_shader =
490          prog_data_vs_bin->uses_vid;
491       shader.base_instance_id_read_by_coordinate_shader =
492          prog_data_vs_bin->uses_biid;
493       shader.instance_id_read_by_coordinate_shader =
494          prog_data_vs_bin->uses_iid;
495       shader.vertex_id_read_by_vertex_shader =
496          prog_data_vs->uses_vid;
497       shader.base_instance_id_read_by_vertex_shader =
498          prog_data_vs->uses_biid;
499       shader.instance_id_read_by_vertex_shader =
500          prog_data_vs->uses_iid;
501 
502       /* Note: see previous note about adresses */
503       /* shader.address_of_default_attribute_values */
504    }
505 }
506 
507 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)508 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
509 {
510    assert(sizeof(pipeline->vcm_cache_size) ==
511           cl_packet_length(VCM_CACHE_SIZE));
512 
513    v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
514       vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
515       vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
516    }
517 }
518 
519 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
520 static uint8_t
get_attr_type(const struct util_format_description * desc)521 get_attr_type(const struct util_format_description *desc)
522 {
523    uint32_t r_size = desc->channel[0].size;
524    uint8_t attr_type = ATTRIBUTE_FLOAT;
525 
526    switch (desc->channel[0].type) {
527    case UTIL_FORMAT_TYPE_FLOAT:
528       if (r_size == 32) {
529          attr_type = ATTRIBUTE_FLOAT;
530       } else {
531          assert(r_size == 16);
532          attr_type = ATTRIBUTE_HALF_FLOAT;
533       }
534       break;
535 
536    case UTIL_FORMAT_TYPE_SIGNED:
537    case UTIL_FORMAT_TYPE_UNSIGNED:
538       switch (r_size) {
539       case 32:
540          attr_type = ATTRIBUTE_INT;
541          break;
542       case 16:
543          attr_type = ATTRIBUTE_SHORT;
544          break;
545       case 10:
546          attr_type = ATTRIBUTE_INT2_10_10_10;
547          break;
548       case 8:
549          attr_type = ATTRIBUTE_BYTE;
550          break;
551       default:
552          fprintf(stderr,
553                  "format %s unsupported\n",
554                  desc->name);
555          attr_type = ATTRIBUTE_BYTE;
556          abort();
557       }
558       break;
559 
560    default:
561       fprintf(stderr,
562               "format %s unsupported\n",
563               desc->name);
564       abort();
565    }
566 
567    return attr_type;
568 }
569 
570 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)571 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
572                                    uint32_t index,
573                                    const VkVertexInputAttributeDescription *vi_desc)
574 {
575    const uint32_t packet_length =
576       cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
577 
578    const struct util_format_description *desc =
579       vk_format_description(vi_desc->format);
580 
581    uint32_t binding = vi_desc->binding;
582 
583    v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
584              GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
585 
586       /* vec_size == 0 means 4 */
587       attr.vec_size = desc->nr_channels & 3;
588       attr.signed_int_type = (desc->channel[0].type ==
589                               UTIL_FORMAT_TYPE_SIGNED);
590       attr.normalized_int_type = desc->channel[0].normalized;
591       attr.read_as_int_uint = desc->channel[0].pure_integer;
592 
593       attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
594                                    0xffff);
595       attr.stride = pipeline->vb[binding].stride;
596       attr.type = get_attr_type(desc);
597    }
598 }
599 
600 void
v3dX(pipeline_pack_compile_state)601 v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
602                                   const VkPipelineVertexInputStateCreateInfo *vi_info,
603                                   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
604 {
605    pack_shader_state_record(pipeline);
606    pack_vcm_cache_size(pipeline);
607 
608    pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
609    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
610       const VkVertexInputBindingDescription *desc =
611          &vi_info->pVertexBindingDescriptions[i];
612 
613       pipeline->vb[desc->binding].stride = desc->stride;
614       pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
615    }
616 
617    if (vd_info) {
618       for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
619          const VkVertexInputBindingDivisorDescriptionEXT *desc =
620             &vd_info->pVertexBindingDivisors[i];
621 
622          pipeline->vb[desc->binding].instance_divisor = desc->divisor;
623       }
624    }
625 
626    pipeline->va_count = 0;
627    struct v3d_vs_prog_data *prog_data_vs =
628       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
629 
630    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
631       const VkVertexInputAttributeDescription *desc =
632          &vi_info->pVertexAttributeDescriptions[i];
633       uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
634 
635       /* We use a custom driver_location_map instead of
636        * nir_find_variable_with_location because if we were able to get the
637        * shader variant from the cache, we would not have the nir shader
638        * available.
639        */
640       uint32_t driver_location =
641          prog_data_vs->driver_location_map[location];
642 
643       if (driver_location != -1) {
644          assert(driver_location < MAX_VERTEX_ATTRIBS);
645          pipeline->va[driver_location].offset = desc->offset;
646          pipeline->va[driver_location].binding = desc->binding;
647          pipeline->va[driver_location].vk_format = desc->format;
648 
649          pack_shader_state_attribute_record(pipeline, driver_location, desc);
650 
651          pipeline->va_count++;
652       }
653    }
654 }
655