1 /*
2  * Copyright © 2019 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "ir3_nir.h"
25 #include "ir3_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27 
28 struct state {
29 	uint32_t topology;
30 
31 	struct primitive_map {
32 		unsigned loc[32];
33 		unsigned size[32];
34 		unsigned stride;
35 	} map;
36 
37 	nir_ssa_def *header;
38 
39 	nir_variable *vertex_count_var;
40 	nir_variable *emitted_vertex_var;
41 	nir_variable *vertex_flags_out;
42 
43 	struct exec_list old_outputs;
44 	struct exec_list new_outputs;
45 	struct exec_list emit_outputs;
46 
47 	/* tess ctrl shader on a650 gets the local primitive id at different bits: */
48 	unsigned local_primitive_id_start;
49 };
50 
51 static nir_ssa_def *
bitfield_extract(nir_builder * b,nir_ssa_def * v,uint32_t start,uint32_t mask)52 bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
53 {
54 	return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
55 			nir_imm_int(b, mask));
56 }
57 
58 static nir_ssa_def *
build_invocation_id(nir_builder * b,struct state * state)59 build_invocation_id(nir_builder *b, struct state *state)
60 {
61 	return bitfield_extract(b, state->header, 11, 31);
62 }
63 
64 static nir_ssa_def *
build_vertex_id(nir_builder * b,struct state * state)65 build_vertex_id(nir_builder *b, struct state *state)
66 {
67 	return bitfield_extract(b, state->header, 6, 31);
68 }
69 
70 static nir_ssa_def *
build_local_primitive_id(nir_builder * b,struct state * state)71 build_local_primitive_id(nir_builder *b, struct state *state)
72 {
73 	return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
74 }
75 
76 static nir_variable *
get_var(nir_shader * shader,nir_variable_mode mode,int driver_location)77 get_var(nir_shader *shader, nir_variable_mode mode, int driver_location)
78 {
79 	nir_foreach_variable_with_modes (v, shader, mode) {
80 		if (v->data.driver_location == driver_location) {
81 			return v;
82 		}
83 	}
84 
85 	return NULL;
86 }
87 
88 static bool
is_tess_levels(nir_variable * var)89 is_tess_levels(nir_variable *var)
90 {
91 	return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
92 			var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
93 }
94 
95 static nir_ssa_def *
build_local_offset(nir_builder * b,struct state * state,nir_ssa_def * vertex,uint32_t base,nir_ssa_def * offset)96 build_local_offset(nir_builder *b, struct state *state,
97 		nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
98 {
99 	nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
100 	nir_ssa_def *primitive_offset =
101 		nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
102 	nir_ssa_def *attr_offset;
103 	nir_ssa_def *vertex_stride;
104 
105 	switch (b->shader->info.stage) {
106 	case MESA_SHADER_VERTEX:
107 	case MESA_SHADER_TESS_EVAL:
108 		vertex_stride = nir_imm_int(b, state->map.stride * 4);
109 		attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
110 		break;
111 	case MESA_SHADER_TESS_CTRL:
112 	case MESA_SHADER_GEOMETRY:
113 		vertex_stride = nir_load_vs_vertex_stride_ir3(b);
114 		attr_offset = nir_load_primitive_location_ir3(b, base);
115 		break;
116 	default:
117 		unreachable("bad shader stage");
118 	}
119 
120 	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
121 
122 	return nir_iadd(b, nir_iadd(b, primitive_offset, vertex_offset),
123 			nir_iadd(b, attr_offset, offset));
124 }
125 
126 static nir_intrinsic_instr *
replace_intrinsic(nir_builder * b,nir_intrinsic_instr * intr,nir_intrinsic_op op,nir_ssa_def * src0,nir_ssa_def * src1,nir_ssa_def * src2)127 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
128 		nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1, nir_ssa_def *src2)
129 {
130 	nir_intrinsic_instr *new_intr =
131 		nir_intrinsic_instr_create(b->shader, op);
132 
133 	new_intr->src[0] = nir_src_for_ssa(src0);
134 	if (src1)
135 		new_intr->src[1] = nir_src_for_ssa(src1);
136 	if (src2)
137 		new_intr->src[2] = nir_src_for_ssa(src2);
138 
139 	new_intr->num_components = intr->num_components;
140 
141 	if (nir_intrinsic_infos[op].has_dest)
142 		nir_ssa_dest_init(&new_intr->instr, &new_intr->dest,
143 						  intr->num_components, 32, NULL);
144 
145 	nir_builder_instr_insert(b, &new_intr->instr);
146 
147 	if (nir_intrinsic_infos[op].has_dest)
148 		nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(&new_intr->dest.ssa));
149 
150 	nir_instr_remove(&intr->instr);
151 
152 	return new_intr;
153 }
154 
155 static void
build_primitive_map(nir_shader * shader,nir_variable_mode mode,struct primitive_map * map)156 build_primitive_map(nir_shader *shader, nir_variable_mode mode, struct primitive_map *map)
157 {
158 	nir_foreach_variable_with_modes (var, shader, mode) {
159 		switch (var->data.location) {
160 		case VARYING_SLOT_TESS_LEVEL_OUTER:
161 		case VARYING_SLOT_TESS_LEVEL_INNER:
162 			continue;
163 		}
164 
165 		unsigned size = glsl_count_attribute_slots(var->type, false) * 4;
166 
167 		assert(var->data.driver_location < ARRAY_SIZE(map->size));
168 		map->size[var->data.driver_location] =
169 			MAX2(map->size[var->data.driver_location], size);
170 	}
171 
172 	unsigned loc = 0;
173 	for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
174 		if (map->size[i] == 0)
175 				continue;
176 		nir_variable *var = get_var(shader, mode, i);
177 		map->loc[i] = loc;
178 		loc += map->size[i];
179 
180 		if (var->data.patch)
181 			map->size[i] = 0;
182 		else
183 			map->size[i] = map->size[i] / glsl_get_length(var->type);
184 	}
185 
186 	map->stride = loc;
187 }
188 
189 static void
lower_block_to_explicit_output(nir_block * block,nir_builder * b,struct state * state)190 lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *state)
191 {
192 	nir_foreach_instr_safe (instr, block) {
193 		if (instr->type != nir_instr_type_intrinsic)
194 			continue;
195 
196 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
197 
198 		switch (intr->intrinsic) {
199 		case nir_intrinsic_store_output: {
200 			// src[] = { value, offset }.
201 
202 			/* nir_lower_io_to_temporaries replaces all access to output
203 			 * variables with temp variables and then emits a nir_copy_var at
204 			 * the end of the shader.  Thus, we should always get a full wrmask
205 			 * here.
206 			 */
207 			assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
208 
209 			b->cursor = nir_instr_remove(&intr->instr);
210 
211 			nir_ssa_def *vertex_id = build_vertex_id(b, state);
212 			nir_ssa_def *offset = build_local_offset(b, state, vertex_id, nir_intrinsic_base(intr),
213 					intr->src[1].ssa);
214 			nir_intrinsic_instr *store =
215 				nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
216 
217 			store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
218 			store->src[1] = nir_src_for_ssa(offset);
219 			store->num_components = intr->num_components;
220 
221 			nir_builder_instr_insert(b, &store->instr);
222 			break;
223 		}
224 
225 		default:
226 			break;
227 		}
228 	}
229 }
230 
231 static nir_ssa_def *
local_thread_id(nir_builder * b)232 local_thread_id(nir_builder *b)
233 {
234 	return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
235 }
236 
237 void
ir3_nir_lower_to_explicit_output(nir_shader * shader,struct ir3_shader_variant * v,unsigned topology)238 ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
239 		unsigned topology)
240 {
241 	struct state state = { };
242 
243 	build_primitive_map(shader, nir_var_shader_out, &state.map);
244 	memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
245 
246 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
247 	assert(impl);
248 
249 	nir_builder b;
250 	nir_builder_init(&b, impl);
251 	b.cursor = nir_before_cf_list(&impl->body);
252 
253 	if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
254 		state.header = nir_load_tcs_header_ir3(&b);
255 	else
256 		state.header = nir_load_gs_header_ir3(&b);
257 
258 	nir_foreach_block_safe (block, impl)
259 		lower_block_to_explicit_output(block, &b, &state);
260 
261 	nir_metadata_preserve(impl, nir_metadata_block_index |
262 			nir_metadata_dominance);
263 
264 	v->output_size = state.map.stride;
265 }
266 
267 
268 static void
lower_block_to_explicit_input(nir_block * block,nir_builder * b,struct state * state)269 lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *state)
270 {
271 	nir_foreach_instr_safe (instr, block) {
272 		if (instr->type != nir_instr_type_intrinsic)
273 			continue;
274 
275 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
276 
277 		switch (intr->intrinsic) {
278 		case nir_intrinsic_load_per_vertex_input: {
279 			// src[] = { vertex, offset }.
280 
281 			b->cursor = nir_before_instr(&intr->instr);
282 
283 			nir_ssa_def *offset = build_local_offset(b, state,
284 					intr->src[0].ssa, // this is typically gl_InvocationID
285 					nir_intrinsic_base(intr),
286 					intr->src[1].ssa);
287 
288 			replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
289 			break;
290 		}
291 
292 		case nir_intrinsic_load_invocation_id: {
293 			b->cursor = nir_before_instr(&intr->instr);
294 
295 			nir_ssa_def *iid = build_invocation_id(b, state);
296 			nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
297 			nir_instr_remove(&intr->instr);
298 			break;
299 		}
300 
301 		default:
302 			break;
303 		}
304 	}
305 }
306 
307 void
ir3_nir_lower_to_explicit_input(nir_shader * shader,struct ir3_compiler * compiler)308 ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
309 {
310  	struct state state = { };
311 
312 	/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
313 	 * HS uses a different primitive id, which starts at bit 16 in the header
314 	 */
315 	if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
316 		state.local_primitive_id_start = 16;
317 
318 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
319 	assert(impl);
320 
321 	nir_builder b;
322 	nir_builder_init(&b, impl);
323 	b.cursor = nir_before_cf_list(&impl->body);
324 
325 	if (shader->info.stage == MESA_SHADER_GEOMETRY)
326 		state.header = nir_load_gs_header_ir3(&b);
327 	else
328 		state.header = nir_load_tcs_header_ir3(&b);
329 
330 	nir_foreach_block_safe (block, impl)
331 		lower_block_to_explicit_input(block, &b, &state);
332 }
333 
334 
335 static nir_ssa_def *
build_per_vertex_offset(nir_builder * b,struct state * state,nir_ssa_def * vertex,nir_ssa_def * offset,nir_variable * var)336 build_per_vertex_offset(nir_builder *b, struct state *state,
337 		nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
338 {
339 	nir_ssa_def *primitive_id = nir_load_primitive_id(b);
340 	nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
341 	nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
342 	nir_ssa_def *attr_offset;
343 	int loc = var->data.driver_location;
344 
345 	switch (b->shader->info.stage) {
346 	case MESA_SHADER_TESS_CTRL:
347 		attr_offset = nir_imm_int(b, state->map.loc[loc]);
348 		break;
349 	case MESA_SHADER_TESS_EVAL:
350 		attr_offset = nir_load_primitive_location_ir3(b, loc);
351 		break;
352 	default:
353 		unreachable("bad shader state");
354 	}
355 
356 	nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
357 	nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
358 
359 	return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
360 			nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
361 }
362 
363 static nir_ssa_def *
build_patch_offset(nir_builder * b,struct state * state,nir_ssa_def * offset,nir_variable * var)364 build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
365 {
366 	debug_assert(var && var->data.patch);
367 
368 	return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
369 }
370 
371 static void
tess_level_components(struct state * state,uint32_t * inner,uint32_t * outer)372 tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
373 {
374 	switch (state->topology) {
375 	case IR3_TESS_TRIANGLES:
376 		*inner = 1;
377 		*outer = 3;
378 		break;
379 	case IR3_TESS_QUADS:
380 		*inner = 2;
381 		*outer = 4;
382 		break;
383 	case IR3_TESS_ISOLINES:
384 		*inner = 0;
385 		*outer = 2;
386 		break;
387 	default:
388 		unreachable("bad");
389 	}
390 }
391 
392 static nir_ssa_def *
build_tessfactor_base(nir_builder * b,gl_varying_slot slot,struct state * state)393 build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
394 {
395 	uint32_t inner_levels, outer_levels;
396 	tess_level_components(state, &inner_levels, &outer_levels);
397 
398 	const uint32_t patch_stride = 1 + inner_levels + outer_levels;
399 
400 	nir_ssa_def *primitive_id = nir_load_primitive_id(b);
401 
402 	nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
403 
404 	uint32_t offset;
405 	switch (slot) {
406 	case VARYING_SLOT_TESS_LEVEL_OUTER:
407 		/* There's some kind of header dword, tess levels start at index 1. */
408 		offset = 1;
409 		break;
410 	case VARYING_SLOT_TESS_LEVEL_INNER:
411 		offset = 1 + outer_levels;
412 		break;
413 	default:
414 		unreachable("bad");
415 	}
416 
417 	return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
418 }
419 
420 static void
lower_tess_ctrl_block(nir_block * block,nir_builder * b,struct state * state)421 lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
422 {
423 	nir_foreach_instr_safe (instr, block) {
424 		if (instr->type != nir_instr_type_intrinsic)
425 			continue;
426 
427 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
428 
429 		switch (intr->intrinsic) {
430 		case nir_intrinsic_control_barrier:
431 		case nir_intrinsic_memory_barrier_tcs_patch:
432 			/* Hull shaders dispatch 32 wide so an entire patch will always
433 			 * fit in a single warp and execute in lock-step.  Consequently,
434 			 * we don't need to do anything for TCS barriers so just remove
435 			 * the intrinsic. Otherwise we'll emit an actual barrier
436 			 * instructions, which will deadlock.
437 			 */
438 			nir_instr_remove(&intr->instr);
439 			break;
440 
441 		case nir_intrinsic_load_per_vertex_output: {
442 			// src[] = { vertex, offset }.
443 
444 			b->cursor = nir_before_instr(&intr->instr);
445 
446 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
447 			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
448 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
449 					intr->src[0].ssa, intr->src[1].ssa, var);
450 
451 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
452 			break;
453 		}
454 
455 		case nir_intrinsic_store_per_vertex_output: {
456 			// src[] = { value, vertex, offset }.
457 
458 			b->cursor = nir_before_instr(&intr->instr);
459 
460 			/* sparse writemask not supported */
461 			assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
462 
463 			nir_ssa_def *value = intr->src[0].ssa;
464 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
465 			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
466 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
467 					intr->src[1].ssa, intr->src[2].ssa, var);
468 
469 			replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
470 					nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
471 
472 			break;
473 		}
474 
475 		case nir_intrinsic_load_output: {
476 			// src[] = { offset }.
477 
478 			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
479 
480 			b->cursor = nir_before_instr(&intr->instr);
481 
482 			nir_ssa_def *address, *offset;
483 
484 			/* note if vectorization of the tess level loads ever happens:
485 			 * "ldg" across 16-byte boundaries can behave incorrectly if results
486 			 * are never used. most likely some issue with (sy) not properly
487 			 * syncing with values coming from a second memory transaction.
488 			 */
489 			if (is_tess_levels(var)) {
490 				assert(intr->dest.ssa.num_components == 1);
491 				address = nir_load_tess_factor_base_ir3(b);
492 				offset = build_tessfactor_base(b, var->data.location, state);
493 			} else {
494 				address = nir_load_tess_param_base_ir3(b);
495 				offset = build_patch_offset(b, state, intr->src[0].ssa, var);
496 			}
497 
498 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
499 			break;
500 		}
501 
502 		case nir_intrinsic_store_output: {
503 			// src[] = { value, offset }.
504 
505 			/* write patch output to bo */
506 
507 			nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
508 
509 			b->cursor = nir_before_instr(&intr->instr);
510 
511 			/* sparse writemask not supported */
512 			assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
513 
514 			if (is_tess_levels(var)) {
515 				/* with tess levels are defined as float[4] and float[2],
516 				 * but tess factor BO has smaller sizes for tris/isolines,
517 				 * so we have to discard any writes beyond the number of
518 				 * components for inner/outer levels */
519 				uint32_t inner_levels, outer_levels, levels;
520 				tess_level_components(state, &inner_levels, &outer_levels);
521 
522 				if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
523 					levels = outer_levels;
524 				else
525 					levels = inner_levels;
526 
527 				assert(intr->src[0].ssa->num_components == 1);
528 
529 				nir_ssa_def *offset =
530 					nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
531 
532 				nir_if *nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
533 
534 				replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
535 						intr->src[0].ssa,
536 						nir_load_tess_factor_base_ir3(b),
537 						nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
538 
539 				nir_pop_if(b, nif);
540 			} else {
541 				nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
542 				nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
543 
544 				debug_assert(nir_intrinsic_component(intr) == 0);
545 
546 				replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
547 						intr->src[0].ssa, address, offset);
548 			}
549 			break;
550 		}
551 
552 		default:
553 			break;
554 		}
555 	}
556 }
557 
558 static void
emit_tess_epilouge(nir_builder * b,struct state * state)559 emit_tess_epilouge(nir_builder *b, struct state *state)
560 {
561 	/* Insert endpatch instruction:
562 	 *
563 	 * TODO we should re-work this to use normal flow control.
564 	 */
565 
566 	nir_intrinsic_instr *end_patch =
567 		nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
568 	nir_builder_instr_insert(b, &end_patch->instr);
569 }
570 
571 void
ir3_nir_lower_tess_ctrl(nir_shader * shader,struct ir3_shader_variant * v,unsigned topology)572 ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
573 		unsigned topology)
574 {
575 	struct state state = { .topology = topology };
576 
577 	if (shader_debug_enabled(shader->info.stage)) {
578 		fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
579 				_mesa_shader_stage_to_string(shader->info.stage));
580 		nir_print_shader(shader, stderr);
581 	}
582 
583 	build_primitive_map(shader, nir_var_shader_out, &state.map);
584 	memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
585 	v->output_size = state.map.stride;
586 
587 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
588 	assert(impl);
589 
590 	nir_builder b;
591 	nir_builder_init(&b, impl);
592 	b.cursor = nir_before_cf_list(&impl->body);
593 
594 	state.header = nir_load_tcs_header_ir3(&b);
595 
596 	nir_foreach_block_safe (block, impl)
597 		lower_tess_ctrl_block(block, &b, &state);
598 
599 	/* Now move the body of the TCS into a conditional:
600 	 *
601 	 *   if (gl_InvocationID < num_vertices)
602 	 *     // body
603 	 *
604 	 */
605 
606 	nir_cf_list body;
607 	nir_cf_extract(&body, nir_before_cf_list(&impl->body),
608 				   nir_after_cf_list(&impl->body));
609 
610 	b.cursor = nir_after_cf_list(&impl->body);
611 
612 	/* Re-emit the header, since the old one got moved into the if branch */
613 	state.header = nir_load_tcs_header_ir3(&b);
614 	nir_ssa_def *iid = build_invocation_id(&b, &state);
615 
616 	const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
617 	nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
618 
619 	nir_if *nif = nir_push_if(&b, cond);
620 
621 	nir_cf_reinsert(&body, b.cursor);
622 
623 	b.cursor = nir_after_cf_list(&nif->then_list);
624 
625 	/* Insert conditional exit for threads invocation id != 0 */
626 	nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
627 	nir_intrinsic_instr *cond_end =
628 		nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
629 	cond_end->src[0] = nir_src_for_ssa(iid0_cond);
630 	nir_builder_instr_insert(&b, &cond_end->instr);
631 
632 	emit_tess_epilouge(&b, &state);
633 
634 	nir_pop_if(&b, nif);
635 
636 	nir_metadata_preserve(impl, 0);
637 }
638 
639 
640 static void
lower_tess_eval_block(nir_block * block,nir_builder * b,struct state * state)641 lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
642 {
643 	nir_foreach_instr_safe (instr, block) {
644 		if (instr->type != nir_instr_type_intrinsic)
645 			continue;
646 
647 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
648 
649 		switch (intr->intrinsic) {
650 		case nir_intrinsic_load_tess_coord: {
651 			b->cursor = nir_after_instr(&intr->instr);
652 			nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
653 			nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
654 			nir_ssa_def *z;
655 
656 			if (state->topology == IR3_TESS_TRIANGLES)
657 				z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
658 			else
659 				z = nir_imm_float(b, 0.0f);
660 
661 			nir_ssa_def *coord = nir_vec3(b, x, y, z);
662 
663 			nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
664 					nir_src_for_ssa(coord),
665 					b->cursor.instr);
666 			break;
667 		}
668 
669 		case nir_intrinsic_load_per_vertex_input: {
670 			// src[] = { vertex, offset }.
671 
672 			b->cursor = nir_before_instr(&intr->instr);
673 
674 			nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
675 			nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
676 			nir_ssa_def *offset = build_per_vertex_offset(b, state,
677 					intr->src[0].ssa, intr->src[1].ssa, var);
678 
679 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
680 			break;
681 		}
682 
683 		case nir_intrinsic_load_input: {
684 			// src[] = { offset }.
685 
686 			nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
687 
688 			debug_assert(var->data.patch);
689 
690 			b->cursor = nir_before_instr(&intr->instr);
691 
692 			nir_ssa_def *address, *offset;
693 
694 			/* note if vectorization of the tess level loads ever happens:
695 			 * "ldg" across 16-byte boundaries can behave incorrectly if results
696 			 * are never used. most likely some issue with (sy) not properly
697 			 * syncing with values coming from a second memory transaction.
698 			 */
699 			if (is_tess_levels(var)) {
700 				assert(intr->dest.ssa.num_components == 1);
701 				address = nir_load_tess_factor_base_ir3(b);
702 				offset = build_tessfactor_base(b, var->data.location, state);
703 			} else {
704 				address = nir_load_tess_param_base_ir3(b);
705 				offset = build_patch_offset(b, state, intr->src[0].ssa, var);
706 			}
707 
708 			offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
709 
710 			replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
711 			break;
712 		}
713 
714 		default:
715 			break;
716 		}
717 	}
718 }
719 
720 void
ir3_nir_lower_tess_eval(nir_shader * shader,unsigned topology)721 ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
722 {
723 	struct state state = { .topology = topology };
724 
725 	if (shader_debug_enabled(shader->info.stage)) {
726 		fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
727 				_mesa_shader_stage_to_string(shader->info.stage));
728 		nir_print_shader(shader, stderr);
729 	}
730 
731 	/* Build map of inputs so we have the sizes. */
732 	build_primitive_map(shader, nir_var_shader_in, &state.map);
733 
734 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
735 	assert(impl);
736 
737 	nir_builder b;
738 	nir_builder_init(&b, impl);
739 
740 	nir_foreach_block_safe (block, impl)
741 		lower_tess_eval_block(block, &b, &state);
742 
743 	nir_metadata_preserve(impl, 0);
744 }
745 
746 static void
lower_gs_block(nir_block * block,nir_builder * b,struct state * state)747 lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
748 {
749 	nir_foreach_instr_safe (instr, block) {
750 		if (instr->type != nir_instr_type_intrinsic)
751 			continue;
752 
753 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
754 
755 		switch (intr->intrinsic) {
756 		case nir_intrinsic_end_primitive: {
757 			b->cursor = nir_before_instr(&intr->instr);
758 			nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
759 			nir_instr_remove(&intr->instr);
760 			break;
761 		}
762 
763 		case nir_intrinsic_emit_vertex: {
764 			/* Load the vertex count */
765 			b->cursor = nir_before_instr(&intr->instr);
766 			nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
767 
768 			nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
769 
770 			foreach_two_lists(dest_node, &state->emit_outputs, src_node, &state->old_outputs) {
771 				nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
772 				nir_variable *src = exec_node_data(nir_variable, src_node, node);
773 				nir_copy_var(b, dest, src);
774 			}
775 
776 			nir_instr_remove(&intr->instr);
777 
778 			nir_store_var(b, state->emitted_vertex_var,
779 					nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
780 
781 			nir_pop_if(b, NULL);
782 
783 			/* Increment the vertex count by 1 */
784 			nir_store_var(b, state->vertex_count_var,
785 					nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
786 			nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
787 
788 			break;
789 		}
790 
791 		default:
792 			break;
793 		}
794 	}
795 }
796 
797 void
ir3_nir_lower_gs(nir_shader * shader)798 ir3_nir_lower_gs(nir_shader *shader)
799 {
800 	struct state state = { };
801 
802 	if (shader_debug_enabled(shader->info.stage)) {
803 		fprintf(stderr, "NIR (before gs lowering):\n");
804 		nir_print_shader(shader, stderr);
805 	}
806 
807 	build_primitive_map(shader, nir_var_shader_in, &state.map);
808 
809 	/* Create an output var for vertex_flags. This will be shadowed below,
810 	 * same way regular outputs get shadowed, and this variable will become a
811 	 * temporary.
812 	 */
813 	state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
814 			glsl_uint_type(), "vertex_flags");
815 	state.vertex_flags_out->data.driver_location = shader->num_outputs++;
816 	state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
817 	state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
818 
819 	nir_function_impl *impl = nir_shader_get_entrypoint(shader);
820 	assert(impl);
821 
822 	nir_builder b;
823 	nir_builder_init(&b, impl);
824 	b.cursor = nir_before_cf_list(&impl->body);
825 
826 	state.header = nir_load_gs_header_ir3(&b);
827 
828 	/* Generate two set of shadow vars for the output variables.  The first
829 	 * set replaces the real outputs and the second set (emit_outputs) we'll
830 	 * assign in the emit_vertex conditionals.  Then at the end of the shader
831 	 * we copy the emit_outputs to the real outputs, so that we get
832 	 * store_output in uniform control flow.
833 	 */
834 	exec_list_make_empty(&state.old_outputs);
835 	nir_foreach_shader_out_variable_safe(var, shader) {
836 		exec_node_remove(&var->node);
837 		exec_list_push_tail(&state.old_outputs, &var->node);
838 	}
839 	exec_list_make_empty(&state.new_outputs);
840 	exec_list_make_empty(&state.emit_outputs);
841 	nir_foreach_variable_in_list(var, &state.old_outputs) {
842 		/* Create a new output var by cloning the original output var and
843 		 * stealing the name.
844 		 */
845 		nir_variable *output = nir_variable_clone(var, shader);
846 		exec_list_push_tail(&state.new_outputs, &output->node);
847 
848 		/* Rewrite the original output to be a shadow variable. */
849 		var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
850 		var->data.mode = nir_var_shader_temp;
851 
852 		/* Clone the shadow variable to create the emit shadow variable that
853 		 * we'll assign in the emit conditionals.
854 		 */
855 		nir_variable *emit_output = nir_variable_clone(var, shader);
856 		emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
857 		exec_list_push_tail(&state.emit_outputs, &emit_output->node);
858 	}
859 
860 	/* During the shader we'll keep track of which vertex we're currently
861 	 * emitting for the EmitVertex test and how many vertices we emitted so we
862 	 * know to discard if didn't emit any.  In most simple shaders, this can
863 	 * all be statically determined and gets optimized away.
864 	 */
865 	state.vertex_count_var =
866 		nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
867 	state.emitted_vertex_var =
868 		nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
869 
870 	/* Initialize to 0. */
871 	b.cursor = nir_before_cf_list(&impl->body);
872 	nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
873 	nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
874 	nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
875 
876 	nir_foreach_block_safe (block, impl)
877 		lower_gs_block(block, &b, &state);
878 
879 	set_foreach(impl->end_block->predecessors, block_entry) {
880 		struct nir_block *block = (void *)block_entry->key;
881 		b.cursor = nir_after_block_before_jump(block);
882 
883 		nir_intrinsic_instr *discard_if =
884 			nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
885 
886 		nir_ssa_def *cond = nir_ieq(&b, nir_load_var(&b, state.emitted_vertex_var), nir_imm_int(&b, 0));
887 
888 		discard_if->src[0] = nir_src_for_ssa(cond);
889 
890 		nir_builder_instr_insert(&b, &discard_if->instr);
891 
892 		foreach_two_lists(dest_node, &state.new_outputs, src_node, &state.emit_outputs) {
893 			nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
894 			nir_variable *src = exec_node_data(nir_variable, src_node, node);
895 			nir_copy_var(&b, dest, src);
896 		}
897 	}
898 
899 	exec_list_append(&shader->variables, &state.old_outputs);
900 	exec_list_append(&shader->variables, &state.emit_outputs);
901 	exec_list_append(&shader->variables, &state.new_outputs);
902 
903 	nir_metadata_preserve(impl, 0);
904 
905 	nir_lower_global_vars_to_local(shader);
906 	nir_split_var_copies(shader);
907 	nir_lower_var_copies(shader);
908 
909 	nir_fixup_deref_modes(shader);
910 
911 	if (shader_debug_enabled(shader->info.stage)) {
912 		fprintf(stderr, "NIR (after gs lowering):\n");
913 		nir_print_shader(shader, stderr);
914 	}
915 }
916 
917 uint32_t
ir3_link_geometry_stages(const struct ir3_shader_variant * producer,const struct ir3_shader_variant * consumer,uint32_t * locs)918 ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
919 		const struct ir3_shader_variant *consumer,
920 		uint32_t *locs)
921 {
922 	uint32_t num_loc = 0, factor;
923 
924 	switch (consumer->type) {
925 	case MESA_SHADER_TESS_CTRL:
926 	case MESA_SHADER_GEOMETRY:
927 		/* These stages load with ldlw, which expects byte offsets. */
928 		factor = 4;
929 		break;
930 	case MESA_SHADER_TESS_EVAL:
931 		/* The tess eval shader uses ldg, which takes dword offsets. */
932 		factor = 1;
933 		break;
934 	default:
935 		unreachable("bad shader stage");
936 	}
937 
938 	nir_foreach_shader_in_variable(in_var, consumer->shader->nir) {
939 		nir_foreach_shader_out_variable(out_var, producer->shader->nir) {
940 			if (in_var->data.location == out_var->data.location) {
941 				locs[in_var->data.driver_location] =
942 					producer->output_loc[out_var->data.driver_location] * factor;
943 
944 				debug_assert(num_loc <= in_var->data.driver_location + 1);
945 				num_loc = in_var->data.driver_location + 1;
946 			}
947 		}
948 	}
949 
950 	return num_loc;
951 }
952