1 /*
2  * Copyright © 2019 Valve Corporation.
3  * Copyright © 2016 Red Hat.
4  * Copyright © 2016 Bas Nieuwenhuizen
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26  * IN THE SOFTWARE.
27  */
28 
29 #include "radv_shader_args.h"
30 #include "radv_private.h"
31 #include "radv_shader.h"
32 
33 static void
set_loc(struct radv_userdata_info * ud_info,uint8_t * sgpr_idx,uint8_t num_sgprs)34 set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
35 {
36    ud_info->sgpr_idx = *sgpr_idx;
37    ud_info->num_sgprs = num_sgprs;
38    *sgpr_idx += num_sgprs;
39 }
40 
41 static void
set_loc_shader(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx,uint8_t num_sgprs)42 set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
43 {
44    struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx];
45    assert(ud_info);
46 
47    set_loc(ud_info, sgpr_idx, num_sgprs);
48 }
49 
50 static void
set_loc_shader_ptr(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)51 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
52 {
53    bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
54 
55    set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
56 }
57 
58 static void
set_loc_desc(struct radv_shader_args * args,int idx,uint8_t * sgpr_idx)59 set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
60 {
61    struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs;
62    struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
63    assert(ud_info);
64 
65    set_loc(ud_info, sgpr_idx, 1);
66 
67    locs->descriptor_sets_enabled |= 1u << idx;
68 }
69 
70 struct user_sgpr_info {
71    bool indirect_all_descriptor_sets;
72    uint8_t remaining_sgprs;
73    unsigned num_inline_push_consts;
74    bool inlined_all_push_consts;
75 };
76 
77 static bool
needs_view_index_sgpr(struct radv_shader_args * args,gl_shader_stage stage)78 needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
79 {
80    switch (stage) {
81    case MESA_SHADER_VERTEX:
82       if (args->shader_info->uses_view_index ||
83           (!args->shader_info->vs.as_es && !args->shader_info->vs.as_ls &&
84            args->options->key.has_multiview_view_index))
85          return true;
86       break;
87    case MESA_SHADER_TESS_EVAL:
88       if (args->shader_info->uses_view_index ||
89           (!args->shader_info->tes.as_es && args->options->key.has_multiview_view_index))
90          return true;
91       break;
92    case MESA_SHADER_TESS_CTRL:
93       if (args->shader_info->uses_view_index)
94          return true;
95       break;
96    case MESA_SHADER_GEOMETRY:
97       if (args->shader_info->uses_view_index ||
98           (args->shader_info->is_ngg && args->options->key.has_multiview_view_index))
99          return true;
100       break;
101    default:
102       break;
103    }
104    return false;
105 }
106 
107 static uint8_t
count_vs_user_sgprs(struct radv_shader_args * args)108 count_vs_user_sgprs(struct radv_shader_args *args)
109 {
110    uint8_t count = 1; /* vertex offset */
111 
112    if (args->shader_info->vs.vb_desc_usage_mask)
113       count++;
114    if (args->shader_info->vs.needs_draw_id)
115       count++;
116    if (args->shader_info->vs.needs_base_instance)
117       count++;
118 
119    return count;
120 }
121 
122 static unsigned
count_ngg_sgprs(struct radv_shader_args * args,bool has_api_gs)123 count_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
124 {
125    unsigned count = 0;
126 
127    if (has_api_gs)
128       count += 1; /* ngg_gs_state */
129    if (args->shader_info->has_ngg_culling)
130       count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
131 
132    return count;
133 }
134 
135 static void
allocate_inline_push_consts(struct radv_shader_args * args,struct user_sgpr_info * user_sgpr_info)136 allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
137 {
138    uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
139 
140    /* Only supported if shaders use push constants. */
141    if (args->shader_info->min_push_constant_used == UINT8_MAX)
142       return;
143 
144    /* Only supported if shaders don't have indirect push constants. */
145    if (args->shader_info->has_indirect_push_constants)
146       return;
147 
148    /* Only supported for 32-bit push constants. */
149    if (!args->shader_info->has_only_32bit_push_constants)
150       return;
151 
152    uint8_t num_push_consts =
153       (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4;
154 
155    /* Check if the number of user SGPRs is large enough. */
156    if (num_push_consts < remaining_sgprs) {
157       user_sgpr_info->num_inline_push_consts = num_push_consts;
158    } else {
159       user_sgpr_info->num_inline_push_consts = remaining_sgprs;
160    }
161 
162    /* Clamp to the maximum number of allowed inlined push constants. */
163    if (user_sgpr_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
164       user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
165 
166    if (user_sgpr_info->num_inline_push_consts == num_push_consts &&
167        !args->shader_info->loads_dynamic_offsets) {
168       /* Disable the default push constants path if all constants are
169        * inlined and if shaders don't use dynamic descriptors.
170        */
171       user_sgpr_info->inlined_all_push_consts = true;
172    }
173 }
174 
175 static void
allocate_user_sgprs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,bool needs_view_index,bool has_api_gs,struct user_sgpr_info * user_sgpr_info)176 allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
177                     gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs,
178                     struct user_sgpr_info *user_sgpr_info)
179 {
180    uint8_t user_sgpr_count = 0;
181 
182    memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
183 
184    /* 2 user sgprs will always be allocated for scratch/rings */
185    user_sgpr_count += 2;
186 
187    /* prolog inputs */
188    if (args->shader_info->vs.has_prolog)
189       user_sgpr_count += 2;
190 
191    switch (stage) {
192    case MESA_SHADER_COMPUTE:
193       if (args->shader_info->cs.uses_sbt)
194          user_sgpr_count += 1;
195       if (args->shader_info->cs.uses_grid_size)
196          user_sgpr_count += 3;
197       if (args->shader_info->cs.uses_ray_launch_size)
198          user_sgpr_count += 3;
199       break;
200    case MESA_SHADER_FRAGMENT:
201       break;
202    case MESA_SHADER_VERTEX:
203       if (!args->is_gs_copy_shader)
204          user_sgpr_count += count_vs_user_sgprs(args);
205       break;
206    case MESA_SHADER_TESS_CTRL:
207       if (has_previous_stage) {
208          if (previous_stage == MESA_SHADER_VERTEX)
209             user_sgpr_count += count_vs_user_sgprs(args);
210       }
211       break;
212    case MESA_SHADER_TESS_EVAL:
213       break;
214    case MESA_SHADER_GEOMETRY:
215       if (has_previous_stage) {
216          if (args->shader_info->is_ngg)
217             user_sgpr_count += count_ngg_sgprs(args, has_api_gs);
218 
219          if (previous_stage == MESA_SHADER_VERTEX) {
220             user_sgpr_count += count_vs_user_sgprs(args);
221          }
222       }
223       break;
224    default:
225       break;
226    }
227 
228    if (needs_view_index)
229       user_sgpr_count++;
230 
231    if (args->shader_info->loads_push_constants)
232       user_sgpr_count++;
233 
234    if (args->shader_info->so.num_outputs)
235       user_sgpr_count++;
236 
237    uint32_t available_sgprs =
238       args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
239    uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
240    uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask);
241 
242    if (remaining_sgprs < num_desc_set) {
243       user_sgpr_info->indirect_all_descriptor_sets = true;
244       user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
245    } else {
246       user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
247    }
248 
249    allocate_inline_push_consts(args, user_sgpr_info);
250 }
251 
252 static void
declare_global_input_sgprs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info)253 declare_global_input_sgprs(struct radv_shader_args *args,
254                            const struct user_sgpr_info *user_sgpr_info)
255 {
256    /* 1 for each descriptor set */
257    if (!user_sgpr_info->indirect_all_descriptor_sets) {
258       uint32_t mask = args->shader_info->desc_set_used_mask;
259 
260       while (mask) {
261          int i = u_bit_scan(&mask);
262 
263          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
264       }
265    } else {
266       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
267    }
268 
269    if (args->shader_info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
270       /* 1 for push constants and dynamic descriptors */
271       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
272    }
273 
274    for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) {
275       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
276    }
277    args->ac.base_inline_push_consts = args->shader_info->min_push_constant_used / 4;
278 
279    if (args->shader_info->so.num_outputs) {
280       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
281    }
282 }
283 
284 static void
declare_vs_specific_input_sgprs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)285 declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage,
286                                 bool has_previous_stage, gl_shader_stage previous_stage)
287 {
288    if (args->shader_info->vs.has_prolog)
289       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs);
290 
291    if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
292                                     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
293       if (args->shader_info->vs.vb_desc_usage_mask) {
294          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
295       }
296       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
297       if (args->shader_info->vs.needs_draw_id) {
298          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
299       }
300       if (args->shader_info->vs.needs_base_instance) {
301          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
302       }
303    }
304 }
305 
306 static void
declare_vs_input_vgprs(struct radv_shader_args * args)307 declare_vs_input_vgprs(struct radv_shader_args *args)
308 {
309    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
310    if (!args->is_gs_copy_shader) {
311       if (args->shader_info->vs.as_ls) {
312          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
313          if (args->options->chip_class >= GFX10) {
314             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
315             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
316          } else {
317             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
318             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
319          }
320       } else {
321          if (args->options->chip_class >= GFX10) {
322             if (args->shader_info->is_ngg) {
323                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
324                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
325                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
326             } else {
327                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
328                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
329                ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
330             }
331          } else {
332             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
333             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
334             ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
335          }
336       }
337    }
338 
339    if (args->shader_info->vs.dynamic_inputs) {
340       assert(args->shader_info->vs.use_per_attribute_vb_descs);
341       unsigned num_attributes = util_last_bit(args->shader_info->vs.vb_desc_usage_mask);
342       for (unsigned i = 0; i < num_attributes; i++)
343          ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
344       /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
345        * VGPR more than the number of shader arguments in the case of non-trivial divisors on GFX8.
346        */
347       ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
348    }
349 }
350 
351 static void
declare_streamout_sgprs(struct radv_shader_args * args,gl_shader_stage stage)352 declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
353 {
354    int i;
355 
356    /* Streamout SGPRs. */
357    if (args->shader_info->so.num_outputs) {
358       assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
359 
360       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
361       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
362    } else if (stage == MESA_SHADER_TESS_EVAL) {
363       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
364    }
365 
366    /* A streamout buffer offset is loaded if the stride is non-zero. */
367    for (i = 0; i < 4; i++) {
368       if (!args->shader_info->so.strides[i])
369          continue;
370 
371       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
372    }
373 }
374 
375 static void
declare_tes_input_vgprs(struct radv_shader_args * args)376 declare_tes_input_vgprs(struct radv_shader_args *args)
377 {
378    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
379    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
380    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
381    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
382 }
383 
384 static void
declare_ps_input_vgprs(struct radv_shader_args * args)385 declare_ps_input_vgprs(struct radv_shader_args *args)
386 {
387    unsigned spi_ps_input = args->shader_info->ps.spi_ps_input;
388 
389    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
390    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
391    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
392    ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
393    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
394    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
395    ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
396    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
397    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
398    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
399    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
400    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
401    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
402    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
403    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
404    ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
405 
406    if (args->options->remap_spi_ps_input) {
407       /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
408        * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
409        * VGPR arguments here.
410        */
411       unsigned arg_count = 0;
412       for (unsigned i = 0, vgpr_arg = 0, vgpr_reg = 0; i < args->ac.arg_count; i++) {
413          if (args->ac.args[i].file != AC_ARG_VGPR) {
414             arg_count++;
415             continue;
416          }
417 
418          if (!(spi_ps_input & (1 << vgpr_arg))) {
419             args->ac.args[i].skip = true;
420          } else {
421             args->ac.args[i].offset = vgpr_reg;
422             vgpr_reg += args->ac.args[i].size;
423             arg_count++;
424          }
425          vgpr_arg++;
426       }
427    }
428 }
429 
430 static void
declare_ngg_sgprs(struct radv_shader_args * args,bool has_api_gs)431 declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
432 {
433    if (has_api_gs) {
434       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
435    }
436 
437    if (args->shader_info->has_ngg_culling) {
438       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
439       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
440       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
441       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[0]);
442       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_translate[1]);
443    }
444 }
445 
446 static void
set_global_input_locs(struct radv_shader_args * args,const struct user_sgpr_info * user_sgpr_info,uint8_t * user_sgpr_idx)447 set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
448                       uint8_t *user_sgpr_idx)
449 {
450    unsigned num_inline_push_consts = 0;
451 
452    if (!user_sgpr_info->indirect_all_descriptor_sets) {
453       for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
454          if (args->descriptor_sets[i].used)
455             set_loc_desc(args, i, user_sgpr_idx);
456       }
457    } else {
458       set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
459    }
460 
461    if (args->ac.push_constants.used) {
462       set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
463    }
464 
465    for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) {
466       if (args->ac.inline_push_consts[i].used)
467          num_inline_push_consts++;
468    }
469 
470    if (num_inline_push_consts) {
471       set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts);
472    }
473 
474    if (args->streamout_buffers.used) {
475       set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
476    }
477 }
478 
479 static void
set_vs_specific_input_locs(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage,uint8_t * user_sgpr_idx)480 set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
481                            bool has_previous_stage, gl_shader_stage previous_stage,
482                            uint8_t *user_sgpr_idx)
483 {
484    if (args->prolog_inputs.used)
485       set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
486 
487    if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
488                                     (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
489       if (args->ac.vertex_buffers.used) {
490          set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
491       }
492 
493       unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used +
494                         args->ac.start_instance.used;
495       set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
496    }
497 }
498 
499 /* Returns whether the stage is a stage that can be directly before the GS */
500 static bool
is_pre_gs_stage(gl_shader_stage stage)501 is_pre_gs_stage(gl_shader_stage stage)
502 {
503    return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
504 }
505 
506 void
radv_declare_shader_args(struct radv_shader_args * args,gl_shader_stage stage,bool has_previous_stage,gl_shader_stage previous_stage)507 radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
508                          bool has_previous_stage, gl_shader_stage previous_stage)
509 {
510    struct user_sgpr_info user_sgpr_info;
511    bool needs_view_index = needs_view_index_sgpr(args, stage);
512    bool has_api_gs = stage == MESA_SHADER_GEOMETRY;
513 
514    if (args->options->chip_class >= GFX10) {
515       if (is_pre_gs_stage(stage) && args->shader_info->is_ngg) {
516          /* On GFX10, VS is merged into GS for NGG. */
517          previous_stage = stage;
518          stage = MESA_SHADER_GEOMETRY;
519          has_previous_stage = true;
520       }
521    }
522 
523    for (int i = 0; i < MAX_SETS; i++)
524       args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
525    for (int i = 0; i < AC_UD_MAX_UD; i++)
526       args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
527 
528    allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index,
529                        has_api_gs, &user_sgpr_info);
530 
531    if (args->options->explicit_scratch_args) {
532       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
533    }
534 
535    /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
536     * sgprs.
537     */
538 
539    switch (stage) {
540    case MESA_SHADER_COMPUTE:
541       declare_global_input_sgprs(args, &user_sgpr_info);
542 
543       if (args->shader_info->cs.uses_sbt) {
544          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors);
545       }
546 
547       if (args->shader_info->cs.uses_grid_size) {
548          ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
549       }
550 
551       if (args->shader_info->cs.uses_ray_launch_size) {
552          ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
553       }
554 
555       for (int i = 0; i < 3; i++) {
556          if (args->shader_info->cs.uses_block_id[i]) {
557             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
558          }
559       }
560 
561       if (args->shader_info->cs.uses_local_invocation_idx) {
562          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
563       }
564 
565       if (args->options->explicit_scratch_args) {
566          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
567       }
568 
569       ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
570       break;
571    case MESA_SHADER_VERTEX:
572       /* NGG is handled by the GS case */
573       assert(!args->shader_info->is_ngg);
574 
575       declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
576 
577       declare_global_input_sgprs(args, &user_sgpr_info);
578 
579       if (needs_view_index) {
580          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
581       }
582 
583       if (args->shader_info->vs.as_es) {
584          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
585       } else if (args->shader_info->vs.as_ls) {
586          /* no extra parameters */
587       } else {
588          declare_streamout_sgprs(args, stage);
589       }
590 
591       if (args->options->explicit_scratch_args) {
592          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
593       }
594 
595       declare_vs_input_vgprs(args);
596       break;
597    case MESA_SHADER_TESS_CTRL:
598       if (has_previous_stage) {
599          // First 6 system regs
600          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
601          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
602          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
603 
604          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
605          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
606          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
607 
608          declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
609 
610          declare_global_input_sgprs(args, &user_sgpr_info);
611 
612          if (needs_view_index) {
613             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
614          }
615 
616          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
617          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
618 
619          declare_vs_input_vgprs(args);
620       } else {
621          declare_global_input_sgprs(args, &user_sgpr_info);
622 
623          if (needs_view_index) {
624             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
625          }
626 
627          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
628          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
629          if (args->options->explicit_scratch_args) {
630             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
631          }
632          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
633          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
634       }
635       break;
636    case MESA_SHADER_TESS_EVAL:
637       /* NGG is handled by the GS case */
638       assert(!args->shader_info->is_ngg);
639 
640       declare_global_input_sgprs(args, &user_sgpr_info);
641 
642       if (needs_view_index)
643          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
644 
645       if (args->shader_info->tes.as_es) {
646          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
647          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
648          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
649       } else {
650          declare_streamout_sgprs(args, stage);
651          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
652       }
653       if (args->options->explicit_scratch_args) {
654          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
655       }
656       declare_tes_input_vgprs(args);
657       break;
658    case MESA_SHADER_GEOMETRY:
659       if (has_previous_stage) {
660          // First 6 system regs
661          if (args->shader_info->is_ngg) {
662             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
663          } else {
664             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
665          }
666 
667          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
668          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
669 
670          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
671          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
672          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
673 
674          if (previous_stage != MESA_SHADER_TESS_EVAL) {
675             declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
676          }
677 
678          declare_global_input_sgprs(args, &user_sgpr_info);
679 
680          if (needs_view_index) {
681             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
682          }
683 
684          if (args->shader_info->is_ngg) {
685             declare_ngg_sgprs(args, has_api_gs);
686          }
687 
688          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
689          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
690          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
691          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
692          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
693 
694          if (previous_stage == MESA_SHADER_VERTEX) {
695             declare_vs_input_vgprs(args);
696          } else {
697             declare_tes_input_vgprs(args);
698          }
699       } else {
700          declare_global_input_sgprs(args, &user_sgpr_info);
701 
702          if (needs_view_index) {
703             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
704          }
705 
706          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
707          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
708          if (args->options->explicit_scratch_args) {
709             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
710          }
711          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
712          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
713          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
714          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
715          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
716          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
717          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
718          ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
719       }
720       break;
721    case MESA_SHADER_FRAGMENT:
722       declare_global_input_sgprs(args, &user_sgpr_info);
723 
724       ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
725       if (args->options->explicit_scratch_args) {
726          ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
727       }
728 
729       declare_ps_input_vgprs(args);
730       break;
731    default:
732       unreachable("Shader stage not implemented");
733    }
734 
735    args->shader_info->num_input_vgprs = 0;
736    args->shader_info->num_input_sgprs = 2;
737    args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
738    args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
739 
740    uint8_t user_sgpr_idx = 0;
741 
742    set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
743 
744    /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
745     * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
746    if (has_previous_stage)
747       user_sgpr_idx = 0;
748 
749    if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))
750       set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
751 
752    set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
753 
754    switch (stage) {
755    case MESA_SHADER_COMPUTE:
756       if (args->ac.sbt_descriptors.used) {
757          set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
758       }
759       if (args->ac.num_work_groups.used) {
760          set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
761       }
762       if (args->ac.ray_launch_size.used) {
763          set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
764       }
765       break;
766    case MESA_SHADER_VERTEX:
767       if (args->ac.view_index.used)
768          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
769       break;
770    case MESA_SHADER_TESS_CTRL:
771       if (args->ac.view_index.used)
772          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
773       break;
774    case MESA_SHADER_TESS_EVAL:
775       if (args->ac.view_index.used)
776          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
777       break;
778    case MESA_SHADER_GEOMETRY:
779       if (args->ac.view_index.used)
780          set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
781 
782       if (args->ngg_gs_state.used) {
783          set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
784       }
785 
786       if (args->ngg_culling_settings.used) {
787          set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
788       }
789 
790       if (args->ngg_viewport_scale[0].used) {
791          assert(args->ngg_viewport_scale[1].used &&
792                 args->ngg_viewport_translate[0].used &&
793                 args->ngg_viewport_translate[1].used);
794          set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
795       }
796       break;
797    case MESA_SHADER_FRAGMENT:
798       break;
799    default:
800       unreachable("Shader stage not implemented");
801    }
802 
803    args->shader_info->num_user_sgprs = user_sgpr_idx;
804 }
805