1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir_xfb_info.h"
25
26 #include <util/u_math.h>
27
28 static void
add_var_xfb_varying(nir_xfb_info * xfb,nir_xfb_varyings_info * varyings,unsigned buffer,unsigned offset,const struct glsl_type * type)29 add_var_xfb_varying(nir_xfb_info *xfb,
30 nir_xfb_varyings_info *varyings,
31 unsigned buffer,
32 unsigned offset,
33 const struct glsl_type *type)
34 {
35 if (varyings == NULL)
36 return;
37
38 nir_xfb_varying_info *varying = &varyings->varyings[varyings->varying_count++];
39
40 varying->type = type;
41 varying->buffer = buffer;
42 varying->offset = offset;
43 xfb->buffers[buffer].varying_count++;
44 }
45
46
47 static nir_xfb_info *
nir_xfb_info_create(void * mem_ctx,uint16_t output_count)48 nir_xfb_info_create(void *mem_ctx, uint16_t output_count)
49 {
50 return rzalloc_size(mem_ctx, nir_xfb_info_size(output_count));
51 }
52
53 static size_t
nir_xfb_varyings_info_size(uint16_t varying_count)54 nir_xfb_varyings_info_size(uint16_t varying_count)
55 {
56 return sizeof(nir_xfb_info) + sizeof(nir_xfb_varying_info) * varying_count;
57 }
58
59 static nir_xfb_varyings_info *
nir_xfb_varyings_info_create(void * mem_ctx,uint16_t varying_count)60 nir_xfb_varyings_info_create(void *mem_ctx, uint16_t varying_count)
61 {
62 return rzalloc_size(mem_ctx, nir_xfb_varyings_info_size(varying_count));
63 }
64
65 static void
add_var_xfb_outputs(nir_xfb_info * xfb,nir_xfb_varyings_info * varyings,nir_variable * var,unsigned buffer,unsigned * location,unsigned * offset,const struct glsl_type * type,bool varying_added)66 add_var_xfb_outputs(nir_xfb_info *xfb,
67 nir_xfb_varyings_info *varyings,
68 nir_variable *var,
69 unsigned buffer,
70 unsigned *location,
71 unsigned *offset,
72 const struct glsl_type *type,
73 bool varying_added)
74 {
75 /* If this type contains a 64-bit value, align to 8 bytes */
76 if (glsl_type_contains_64bit(type))
77 *offset = ALIGN_POT(*offset, 8);
78
79 if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
80 unsigned length = glsl_get_length(type);
81
82 const struct glsl_type *child_type = glsl_get_array_element(type);
83 if (!glsl_type_is_array(child_type) &&
84 !glsl_type_is_struct(child_type)) {
85
86 add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
87 varying_added = true;
88 }
89
90 for (unsigned i = 0; i < length; i++)
91 add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
92 child_type, varying_added);
93 } else if (glsl_type_is_struct_or_ifc(type)) {
94 unsigned length = glsl_get_length(type);
95 for (unsigned i = 0; i < length; i++) {
96 const struct glsl_type *child_type = glsl_get_struct_field(type, i);
97 add_var_xfb_outputs(xfb, varyings, var, buffer, location, offset,
98 child_type, varying_added);
99 }
100 } else {
101 assert(buffer < NIR_MAX_XFB_BUFFERS);
102 if (xfb->buffers_written & (1 << buffer)) {
103 assert(xfb->buffers[buffer].stride == var->data.xfb.stride);
104 assert(xfb->buffer_to_stream[buffer] == var->data.stream);
105 } else {
106 xfb->buffers_written |= (1 << buffer);
107 xfb->buffers[buffer].stride = var->data.xfb.stride;
108 xfb->buffer_to_stream[buffer] = var->data.stream;
109 }
110
111 assert(var->data.stream < NIR_MAX_XFB_STREAMS);
112 xfb->streams_written |= (1 << var->data.stream);
113
114 unsigned comp_slots;
115 if (var->data.compact) {
116 /* This only happens for clip/cull which are float arrays */
117 assert(glsl_without_array(type) == glsl_float_type());
118 assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
119 var->data.location == VARYING_SLOT_CLIP_DIST1);
120 comp_slots = glsl_get_length(type);
121 } else {
122 comp_slots = glsl_get_component_slots(type);
123
124 UNUSED unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
125 assert(attrib_slots == glsl_count_attribute_slots(type, false));
126
127 /* Ensure that we don't have, for instance, a dvec2 with a
128 * location_frac of 2 which would make it crass a location boundary
129 * even though it fits in a single slot. However, you can have a
130 * dvec3 which crosses the slot boundary with a location_frac of 2.
131 */
132 assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
133 attrib_slots);
134 }
135
136 assert(var->data.location_frac + comp_slots <= 8);
137 uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
138 unsigned comp_offset = var->data.location_frac;
139
140 if (!varying_added) {
141 add_var_xfb_varying(xfb, varyings, buffer, *offset, type);
142 }
143
144 while (comp_mask) {
145 nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
146
147 output->buffer = buffer;
148 output->offset = *offset;
149 output->location = *location;
150 output->component_mask = comp_mask & 0xf;
151 output->component_offset = comp_offset;
152
153 *offset += util_bitcount(output->component_mask) * 4;
154 (*location)++;
155 comp_mask >>= 4;
156 comp_offset = 0;
157 }
158 }
159 }
160
161 static int
compare_xfb_varying_offsets(const void * _a,const void * _b)162 compare_xfb_varying_offsets(const void *_a, const void *_b)
163 {
164 const nir_xfb_varying_info *a = _a, *b = _b;
165
166 if (a->buffer != b->buffer)
167 return a->buffer - b->buffer;
168
169 return a->offset - b->offset;
170 }
171
172 static int
compare_xfb_output_offsets(const void * _a,const void * _b)173 compare_xfb_output_offsets(const void *_a, const void *_b)
174 {
175 const nir_xfb_output_info *a = _a, *b = _b;
176
177 return a->offset - b->offset;
178 }
179
180 nir_xfb_info *
nir_gather_xfb_info(const nir_shader * shader,void * mem_ctx)181 nir_gather_xfb_info(const nir_shader *shader, void *mem_ctx)
182 {
183 return nir_gather_xfb_info_with_varyings(shader, mem_ctx, NULL);
184 }
185
186 nir_xfb_info *
nir_gather_xfb_info_with_varyings(const nir_shader * shader,void * mem_ctx,nir_xfb_varyings_info ** varyings_info_out)187 nir_gather_xfb_info_with_varyings(const nir_shader *shader,
188 void *mem_ctx,
189 nir_xfb_varyings_info **varyings_info_out)
190 {
191 assert(shader->info.stage == MESA_SHADER_VERTEX ||
192 shader->info.stage == MESA_SHADER_TESS_EVAL ||
193 shader->info.stage == MESA_SHADER_GEOMETRY);
194
195 /* Compute the number of outputs we have. This is simply the number of
196 * cumulative locations consumed by all the variables. If a location is
197 * represented by multiple variables, then they each count separately in
198 * number of outputs. This is only an estimate as some variables may have
199 * an xfb_buffer but not an output so it may end up larger than we need but
200 * it should be good enough for allocation.
201 */
202 unsigned num_outputs = 0;
203 unsigned num_varyings = 0;
204 nir_xfb_varyings_info *varyings_info = NULL;
205 nir_foreach_shader_out_variable(var, shader) {
206 if (var->data.explicit_xfb_buffer) {
207 num_outputs += glsl_count_attribute_slots(var->type, false);
208 num_varyings += glsl_varying_count(var->type);
209 }
210 }
211 if (num_outputs == 0 || num_varyings == 0)
212 return NULL;
213
214 nir_xfb_info *xfb = nir_xfb_info_create(mem_ctx, num_outputs);
215 if (varyings_info_out != NULL) {
216 *varyings_info_out = nir_xfb_varyings_info_create(mem_ctx, num_varyings);
217 varyings_info = *varyings_info_out;
218 }
219
220 /* Walk the list of outputs and add them to the array */
221 nir_foreach_shader_out_variable(var, shader) {
222 if (!var->data.explicit_xfb_buffer)
223 continue;
224
225 unsigned location = var->data.location;
226
227 /* In order to know if we have a array of blocks can't be done just by
228 * checking if we have an interface type and is an array, because due
229 * splitting we could end on a case were we received a split struct
230 * that contains an array.
231 */
232 bool is_array_block = var->interface_type != NULL &&
233 glsl_type_is_array(var->type) &&
234 glsl_without_array(var->type) == var->interface_type;
235
236 if (var->data.explicit_offset && !is_array_block) {
237 unsigned offset = var->data.offset;
238 add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer,
239 &location, &offset, var->type, false);
240 } else if (is_array_block) {
241 assert(glsl_type_is_struct_or_ifc(var->interface_type));
242
243 unsigned aoa_size = glsl_get_aoa_size(var->type);
244 const struct glsl_type *itype = var->interface_type;
245 unsigned nfields = glsl_get_length(itype);
246 for (unsigned b = 0; b < aoa_size; b++) {
247 for (unsigned f = 0; f < nfields; f++) {
248 int foffset = glsl_get_struct_field_offset(itype, f);
249 const struct glsl_type *ftype = glsl_get_struct_field(itype, f);
250 if (foffset < 0) {
251 location += glsl_count_attribute_slots(ftype, false);
252 continue;
253 }
254
255 unsigned offset = foffset;
256 add_var_xfb_outputs(xfb, varyings_info, var, var->data.xfb.buffer + b,
257 &location, &offset, ftype, false);
258 }
259 }
260 }
261 }
262
263 /* Everything is easier in the state setup code if outputs and varyings are
264 * sorted in order of output offset (and buffer for varyings).
265 */
266 qsort(xfb->outputs, xfb->output_count, sizeof(xfb->outputs[0]),
267 compare_xfb_output_offsets);
268
269 if (varyings_info != NULL) {
270 qsort(varyings_info->varyings, varyings_info->varying_count,
271 sizeof(varyings_info->varyings[0]),
272 compare_xfb_varying_offsets);
273 }
274
275 #ifndef NDEBUG
276 /* Finally, do a sanity check */
277 unsigned max_offset[NIR_MAX_XFB_BUFFERS] = {0};
278 for (unsigned i = 0; i < xfb->output_count; i++) {
279 assert(xfb->outputs[i].offset >= max_offset[xfb->outputs[i].buffer]);
280 assert(xfb->outputs[i].component_mask != 0);
281 unsigned slots = util_bitcount(xfb->outputs[i].component_mask);
282 max_offset[xfb->outputs[i].buffer] = xfb->outputs[i].offset + slots * 4;
283 }
284 #endif
285
286 return xfb;
287 }
288