1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  */
24 
25 #ifndef __PAN_SHADER_H__
26 #define __PAN_SHADER_H__
27 
28 #include "compiler/nir/nir.h"
29 #include "panfrost/util/pan_ir.h"
30 
31 #include "pan_device.h"
32 #include "genxml/gen_macros.h"
33 
34 struct panfrost_device;
35 
36 #ifdef PAN_ARCH
37 const nir_shader_compiler_options *
38 GENX(pan_shader_get_compiler_options)(void);
39 
40 void
41 GENX(pan_shader_compile)(nir_shader *nir,
42                          struct panfrost_compile_inputs *inputs,
43                          struct util_dynarray *binary,
44                          struct pan_shader_info *info);
45 
46 #if PAN_ARCH <= 5
47 static inline void
pan_shader_prepare_midgard_rsd(const struct pan_shader_info * info,struct MALI_RENDERER_STATE * rsd)48 pan_shader_prepare_midgard_rsd(const struct pan_shader_info *info,
49                                struct MALI_RENDERER_STATE *rsd)
50 {
51         assert((info->push.count & 3) == 0);
52 
53         rsd->properties.uniform_count = info->push.count / 4;
54         rsd->properties.shader_has_side_effects = info->writes_global;
55         rsd->properties.fp_mode = MALI_FP_MODE_GL_INF_NAN_ALLOWED;
56 
57         /* For fragment shaders, work register count, early-z, reads at draw-time */
58 
59         if (info->stage != MESA_SHADER_FRAGMENT) {
60                 rsd->properties.work_register_count = info->work_reg_count;
61         } else {
62                 rsd->properties.shader_reads_tilebuffer =
63                         info->fs.outputs_read;
64 
65                 /* However, forcing early-z in the shader overrides draw-time */
66                 rsd->properties.force_early_z =
67                         info->fs.early_fragment_tests;
68         }
69 }
70 
71 #else
72 
73 /* Classify a shader into the following pixel kill categories:
74  *
75  * (force early, strong early): no side effects/depth/stencil/coverage writes (force)
76  * (weak early, weak early): no side effects/depth/stencil/coverage writes
77  * (weak early, force late): no side effects/depth/stencil writes
78  * (force late, weak early): side effects but no depth/stencil/coverage writes
79  * (force late, force early): only run for side effects
80  * (force late, force late): depth/stencil writes
81  *
82  * Note that discard is considered a coverage write. TODO: what about
83  * alpha-to-coverage?
84  * */
85 
86 #define SET_PIXEL_KILL(kill, update) do { \
87         rsd->properties.pixel_kill_operation = MALI_PIXEL_KILL_## kill; \
88         rsd->properties.zs_update_operation = MALI_PIXEL_KILL_## update; \
89 } while(0)
90 
91 static inline void
pan_shader_classify_pixel_kill_coverage(const struct pan_shader_info * info,struct MALI_RENDERER_STATE * rsd)92 pan_shader_classify_pixel_kill_coverage(const struct pan_shader_info *info,
93                 struct MALI_RENDERER_STATE *rsd)
94 {
95         bool force_early = info->fs.early_fragment_tests;
96         bool sidefx = info->writes_global;
97         bool coverage = info->fs.writes_coverage || info->fs.can_discard;
98         bool depth = info->fs.writes_depth;
99         bool stencil = info->fs.writes_stencil;
100 
101         rsd->properties.shader_modifies_coverage = coverage;
102 
103         if (force_early)
104                 SET_PIXEL_KILL(FORCE_EARLY, STRONG_EARLY);
105         else if (depth || stencil || (sidefx && coverage))
106                 SET_PIXEL_KILL(FORCE_LATE, FORCE_LATE);
107         else if (sidefx)
108                 SET_PIXEL_KILL(FORCE_LATE, WEAK_EARLY);
109         else if (coverage)
110                 SET_PIXEL_KILL(WEAK_EARLY, FORCE_LATE);
111         else
112                 SET_PIXEL_KILL(WEAK_EARLY, WEAK_EARLY);
113 }
114 
115 #undef SET_PIXEL_KILL
116 
117 static inline void
pan_shader_prepare_bifrost_rsd(const struct pan_shader_info * info,struct MALI_RENDERER_STATE * rsd)118 pan_shader_prepare_bifrost_rsd(const struct pan_shader_info *info,
119                                struct MALI_RENDERER_STATE *rsd)
120 {
121         unsigned fau_count = DIV_ROUND_UP(info->push.count, 2);
122         rsd->preload.uniform_count = fau_count;
123 
124 #if PAN_ARCH >= 7
125         rsd->properties.shader_register_allocation =
126                 (info->work_reg_count <= 32) ?
127                 MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD :
128                 MALI_SHADER_REGISTER_ALLOCATION_64_PER_THREAD;
129 #endif
130 
131         switch (info->stage) {
132         case MESA_SHADER_VERTEX:
133                 rsd->preload.vertex.vertex_id = true;
134                 rsd->preload.vertex.instance_id = true;
135                 break;
136 
137         case MESA_SHADER_FRAGMENT:
138                 pan_shader_classify_pixel_kill_coverage(info, rsd);
139 
140 #if PAN_ARCH >= 7
141                 rsd->properties.shader_wait_dependency_6 = info->bifrost.wait_6;
142                 rsd->properties.shader_wait_dependency_7 = info->bifrost.wait_7;
143 #endif
144 
145                 /* Match the mesa/st convention. If this needs to be flipped,
146                  * nir_lower_pntc_ytransform will do so. */
147                 rsd->properties.point_sprite_coord_origin_max_y = true;
148 
149                 rsd->properties.allow_forward_pixel_to_be_killed =
150                         !info->fs.sidefx;
151 
152                 rsd->preload.fragment.fragment_position = info->fs.reads_frag_coord;
153                 rsd->preload.fragment.coverage = true;
154                 rsd->preload.fragment.primitive_flags = info->fs.reads_face;
155 
156                 /* Contains sample ID and sample mask. Sample position and
157                  * helper invocation are expressed in terms of the above, so
158                  * preload for those too */
159                 rsd->preload.fragment.sample_mask_id =
160                         info->fs.reads_sample_id |
161                         info->fs.reads_sample_pos |
162                         info->fs.reads_sample_mask_in |
163                         info->fs.reads_helper_invocation |
164                         info->fs.sample_shading;
165 
166 #if PAN_ARCH >= 7
167                 rsd->message_preload_1 = info->bifrost.messages[0];
168                 rsd->message_preload_2 = info->bifrost.messages[1];
169 #endif
170                 break;
171 
172         case MESA_SHADER_COMPUTE:
173                 rsd->preload.compute.local_invocation_xy = true;
174                 rsd->preload.compute.local_invocation_z = true;
175                 rsd->preload.compute.work_group_x = true;
176                 rsd->preload.compute.work_group_y = true;
177                 rsd->preload.compute.work_group_z = true;
178                 rsd->preload.compute.global_invocation_x = true;
179                 rsd->preload.compute.global_invocation_y = true;
180                 rsd->preload.compute.global_invocation_z = true;
181                 break;
182 
183         default:
184                 unreachable("TODO");
185         }
186 }
187 
188 #endif
189 
190 static inline void
pan_shader_prepare_rsd(const struct pan_shader_info * shader_info,mali_ptr shader_ptr,struct MALI_RENDERER_STATE * rsd)191 pan_shader_prepare_rsd(const struct pan_shader_info *shader_info,
192                        mali_ptr shader_ptr,
193                        struct MALI_RENDERER_STATE *rsd)
194 {
195 #if PAN_ARCH <= 5
196         shader_ptr |= shader_info->midgard.first_tag;
197 #endif
198 
199         rsd->shader.shader = shader_ptr;
200         rsd->shader.attribute_count = shader_info->attribute_count;
201         rsd->shader.varying_count = shader_info->varyings.input_count +
202                                    shader_info->varyings.output_count;
203         rsd->shader.texture_count = shader_info->texture_count;
204         rsd->shader.sampler_count = shader_info->sampler_count;
205         rsd->properties.shader_contains_barrier = shader_info->contains_barrier;
206         rsd->properties.uniform_buffer_count = shader_info->ubo_count;
207 
208         if (shader_info->stage == MESA_SHADER_FRAGMENT) {
209                 rsd->properties.shader_contains_barrier |=
210                         shader_info->fs.helper_invocations;
211                 rsd->properties.stencil_from_shader =
212                         shader_info->fs.writes_stencil;
213                 rsd->properties.depth_source =
214                         shader_info->fs.writes_depth ?
215                         MALI_DEPTH_SOURCE_SHADER :
216                         MALI_DEPTH_SOURCE_FIXED_FUNCTION;
217 
218                 /* This also needs to be set if the API forces per-sample
219                  * shading, but that'll just got ORed in */
220                 rsd->multisample_misc.evaluate_per_sample =
221                         shader_info->fs.sample_shading;
222         }
223 
224 #if PAN_ARCH >= 6
225         pan_shader_prepare_bifrost_rsd(shader_info, rsd);
226 #else
227         pan_shader_prepare_midgard_rsd(shader_info, rsd);
228 #endif
229 }
230 #endif /* PAN_ARCH */
231 
232 #endif
233