1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir.h"
28 #include "nir_builder.h"
29 
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32 
33 #include "sfn_instruction_tex.h"
34 
35 #include "sfn_shader_vertex.h"
36 #include "sfn_shader_fragment.h"
37 #include "sfn_shader_geometry.h"
38 #include "sfn_shader_compute.h"
39 #include "sfn_shader_tcs.h"
40 #include "sfn_shader_tess_eval.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_ir_to_assembly.h"
43 
44 #include <vector>
45 
46 namespace r600 {
47 
48 using std::vector;
49 
ShaderFromNir()50 ShaderFromNir::ShaderFromNir():sh(nullptr),
51    m_current_if_id(0),
52    m_current_loop_id(0)
53 {
54 }
55 
lower(const nir_shader * shader,r600_pipe_shader * pipe_shader,r600_pipe_shader_selector * sel,r600_shader_key & key,struct r600_shader * gs_shader,enum chip_class _chip_class)56 bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
57                           r600_pipe_shader_selector *sel, r600_shader_key& key,
58                           struct r600_shader* gs_shader, enum chip_class _chip_class)
59 {
60    sh = shader;
61    chip_class = _chip_class;
62    assert(sh);
63 
64    switch (shader->info.stage) {
65    case MESA_SHADER_VERTEX:
66       impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
67       break;
68    case MESA_SHADER_TESS_CTRL:
69       sfn_log << SfnLog::trans << "Start TCS\n";
70       impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
71       break;
72    case MESA_SHADER_TESS_EVAL:
73       sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
74       impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
75       break;
76    case MESA_SHADER_GEOMETRY:
77       sfn_log << SfnLog::trans << "Start GS\n";
78       impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
79       break;
80    case MESA_SHADER_FRAGMENT:
81       sfn_log << SfnLog::trans << "Start FS\n";
82       impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
83       break;
84    case MESA_SHADER_COMPUTE:
85       sfn_log << SfnLog::trans << "Start CS\n";
86       impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
87       break;
88    default:
89       return false;
90    }
91 
92    sfn_log << SfnLog::trans << "Process declarations\n";
93    if (!process_declaration())
94       return false;
95 
96    // at this point all functions should be inlined
97    const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
98 
99    sfn_log << SfnLog::trans << "Scan shader\n";
100    nir_foreach_block(block, func->impl) {
101       nir_foreach_instr(instr, block) {
102          if (!impl->scan_instruction(instr)) {
103             fprintf(stderr, "Unhandled sysvalue access ");
104             nir_print_instr(instr, stderr);
105             fprintf(stderr, "\n");
106             return false;
107          }
108       }
109    }
110 
111    sfn_log << SfnLog::trans << "Reserve registers\n";
112    if (!impl->allocate_reserved_registers()) {
113       return false;
114    }
115 
116    ValuePool::array_list arrays;
117    sfn_log << SfnLog::trans << "Allocate local registers\n";
118    foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
119       impl->allocate_local_register(*reg, arrays);
120    }
121 
122    sfn_log << SfnLog::trans << "Emit shader start\n";
123    impl->allocate_arrays(arrays);
124 
125    impl->emit_shader_start();
126 
127    sfn_log << SfnLog::trans << "Process shader \n";
128    foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
129       if (!process_cf_node(node))
130          return false;
131    }
132 
133    // Add optimizations here
134    sfn_log << SfnLog::trans << "Finalize\n";
135    impl->finalize();
136 
137    if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
138       sfn_log << SfnLog::trans << "Merge registers\n";
139       impl->remap_registers();
140    }
141    sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
142    return true;
143 }
144 
shader() const145 Shader ShaderFromNir::shader() const
146 {
147    return Shader{impl->m_output, impl->get_temp_registers()};
148 }
149 
150 
process_cf_node(nir_cf_node * node)151 bool ShaderFromNir::process_cf_node(nir_cf_node *node)
152 {
153    SFN_TRACE_FUNC(SfnLog::flow, "CF");
154    switch (node->type) {
155    case nir_cf_node_block:
156       return process_block(nir_cf_node_as_block(node));
157    case nir_cf_node_if:
158       return process_if(nir_cf_node_as_if(node));
159    case nir_cf_node_loop:
160       return process_loop(nir_cf_node_as_loop(node));
161    default:
162       return false;
163    }
164 }
165 
process_if(nir_if * if_stmt)166 bool ShaderFromNir::process_if(nir_if *if_stmt)
167 {
168    SFN_TRACE_FUNC(SfnLog::flow, "IF");
169 
170    if (!impl->emit_if_start(m_current_if_id, if_stmt))
171       return false;
172 
173    int if_id = m_current_if_id++;
174    m_if_stack.push(if_id);
175 
176    foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
177          if (!process_cf_node(n)) return false;
178 
179    if (!if_stmt->then_list.is_empty()) {
180       if (!impl->emit_else_start(if_id))
181          return false;
182 
183       foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
184             if (!process_cf_node(n)) return false;
185    }
186 
187    if (!impl->emit_ifelse_end(if_id))
188       return false;
189 
190    m_if_stack.pop();
191    return true;
192 }
193 
process_loop(nir_loop * node)194 bool ShaderFromNir::process_loop(nir_loop *node)
195 {
196    SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
197    int loop_id = m_current_loop_id++;
198 
199    if (!impl->emit_loop_start(loop_id))
200       return false;
201 
202    foreach_list_typed(nir_cf_node, n, node, &node->body)
203          if (!process_cf_node(n)) return false;
204 
205    if (!impl->emit_loop_end(loop_id))
206       return false;
207 
208    return true;
209 }
210 
process_block(nir_block * block)211 bool ShaderFromNir::process_block(nir_block *block)
212 {
213    SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
214    nir_foreach_instr(instr, block) {
215       int r = emit_instruction(instr);
216       if (!r) {
217          sfn_log << SfnLog::err << "R600: Unsupported instruction: "
218                  << *instr << "\n";
219          return false;
220       }
221    }
222    return true;
223 }
224 
225 
~ShaderFromNir()226 ShaderFromNir::~ShaderFromNir()
227 {
228 }
229 
processor_type() const230 pipe_shader_type ShaderFromNir::processor_type() const
231 {
232    return impl->m_processor_type;
233 }
234 
235 
emit_instruction(nir_instr * instr)236 bool ShaderFromNir::emit_instruction(nir_instr *instr)
237 {
238    assert(impl);
239 
240    sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
241 
242    switch (instr->type) {
243    case nir_instr_type_alu:
244       return impl->emit_alu_instruction(instr);
245    case nir_instr_type_deref:
246       return impl->emit_deref_instruction(nir_instr_as_deref(instr));
247    case nir_instr_type_intrinsic:
248       return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
249    case nir_instr_type_load_const:
250       return impl->set_literal_constant(nir_instr_as_load_const(instr));
251    case nir_instr_type_tex:
252       return impl->emit_tex_instruction(instr);
253    case nir_instr_type_jump:
254       return impl->emit_jump_instruction(nir_instr_as_jump(instr));
255    default:
256       fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
257       nir_print_instr(instr, stderr);
258       fprintf(stderr, "'\n");
259       return false;
260    case nir_instr_type_ssa_undef:
261       return impl->create_undef(nir_instr_as_ssa_undef(instr));
262       return true;
263    }
264 }
265 
process_declaration()266 bool ShaderFromNir::process_declaration()
267 {
268    // scan declarations
269    nir_foreach_shader_in_variable(variable, sh) {
270       if (!impl->process_inputs(variable)) {
271          fprintf(stderr, "R600: error parsing input varible %s\n", variable->name);
272          return false;
273       }
274    }
275 
276    // scan declarations
277    nir_foreach_shader_out_variable(variable, sh) {
278       if (!impl->process_outputs(variable)) {
279          fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
280          return false;
281       }
282    }
283 
284    // scan declarations
285    nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
286                                                  nir_var_mem_ubo |
287                                                  nir_var_mem_ssbo) {
288       if (!impl->process_uniforms(variable)) {
289          fprintf(stderr, "R600: error parsing outputs varible %s\n", variable->name);
290          return false;
291       }
292    }
293 
294    return true;
295 }
296 
shader_ir() const297 const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
298 {
299    assert(impl);
300    return impl->m_output;
301 }
302 
303 
~AssemblyFromShader()304 AssemblyFromShader::~AssemblyFromShader()
305 {
306 }
307 
lower(const std::vector<InstructionBlock> & ir)308 bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
309 {
310    return do_lower(ir);
311 }
312 
313 static nir_ssa_def *
r600_nir_lower_pack_unpack_2x16_impl(nir_builder * b,nir_instr * instr,void * _options)314 r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
315 {
316    nir_alu_instr *alu = nir_instr_as_alu(instr);
317 
318    switch (alu->op) {
319    case nir_op_unpack_half_2x16: {
320       nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
321       return  nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
322                        nir_unpack_half_2x16_split_y(b, packed));
323 
324    }
325    case nir_op_pack_half_2x16: {
326       nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
327       return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
328                                       nir_channel(b, src_vec2, 1));
329    }
330    default:
331       return nullptr;
332    }
333 }
334 
r600_nir_lower_pack_unpack_2x16_filter(const nir_instr * instr,const void * _options)335 bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
336 {
337    return instr->type == nir_instr_type_alu;
338 }
339 
r600_nir_lower_pack_unpack_2x16(nir_shader * shader)340 bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
341 {
342    return nir_shader_lower_instructions(shader,
343                                         r600_nir_lower_pack_unpack_2x16_filter,
344                                         r600_nir_lower_pack_unpack_2x16_impl,
345                                         nullptr);
346 };
347 
348 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)349 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
350 {
351    b->cursor = nir_before_instr(&instr->instr);
352 
353    int address_index = 0;
354    int align;
355 
356    if (instr->intrinsic == nir_intrinsic_store_scratch) {
357       align  = instr->src[0].ssa->num_components;
358       address_index = 1;
359    } else{
360       align = instr->dest.ssa.num_components;
361    }
362 
363    nir_ssa_def *address = instr->src[address_index].ssa;
364    nir_ssa_def *new_address = nir_ishr(b, address,  nir_imm_int(b, 4 * align));
365 
366    nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
367                          nir_src_for_ssa(new_address));
368 }
369 
r600_lower_scratch_addresses(nir_shader * shader)370 bool r600_lower_scratch_addresses(nir_shader *shader)
371 {
372    bool progress = false;
373    nir_foreach_function(function, shader) {
374       nir_builder build;
375       nir_builder_init(&build, function->impl);
376 
377       nir_foreach_block(block, function->impl) {
378          nir_foreach_instr(instr, block) {
379             if (instr->type != nir_instr_type_intrinsic)
380                continue;
381             nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
382             if (op->intrinsic != nir_intrinsic_load_scratch &&
383                 op->intrinsic != nir_intrinsic_store_scratch)
384                continue;
385             r600_nir_lower_scratch_address_impl(&build, op);
386             progress = true;
387          }
388       }
389    }
390    return progress;
391 }
392 
393 static nir_ssa_def *
r600_lower_ubo_to_align16_impl(nir_builder * b,nir_instr * instr,void * _options)394 r600_lower_ubo_to_align16_impl(nir_builder *b, nir_instr *instr, void *_options)
395 {
396    b->cursor = nir_before_instr(instr);
397 
398    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
399    assert(op->intrinsic == nir_intrinsic_load_ubo);
400 
401    bool const_address = (nir_src_is_const(op->src[1]) && nir_src_is_const(op->src[0]));
402 
403    nir_ssa_def *offset = op->src[1].ssa;
404 
405    /* This is ugly: With const addressing we can actually set a proper fetch target mask,
406     * but for this we need the component encoded, we don't shift and do de decoding in the
407     * backend. Otherwise we shift by four and resolve the component here
408     * (TODO: encode the start component in the intrinsic when the offset base is non-constant
409     * but a multiple of 16 */
410 
411    nir_ssa_def *new_offset = offset;
412    if (!const_address)
413       new_offset = nir_ishr(b, offset,  nir_imm_int(b, 4));
414 
415    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo_r600);
416    load->num_components = const_address ? op->num_components : 4;
417    load->src[0] = op->src[0];
418    load->src[1] = nir_src_for_ssa(new_offset);
419    nir_intrinsic_set_align(load, nir_intrinsic_align_mul(op), nir_intrinsic_align_offset(op));
420 
421    nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
422    nir_builder_instr_insert(b, &load->instr);
423 
424    /* when four components are loaded or both the offset and the location
425     * are constant, then the backend can deal with it better */
426    if (op->num_components == 4 || const_address)
427       return &load->dest.ssa;
428 
429    /* What comes below is a performance disaster when the offset is not constant
430     * because then we have to assume that any component can be the first one and we
431     * have to pick the result manually. */
432    nir_ssa_def *first_comp = nir_iand(b, nir_ishr(b, offset,  nir_imm_int(b, 2)),
433                                       nir_imm_int(b,3));
434 
435    const unsigned swz_000[4] = {0, 0, 0};
436    nir_ssa_def *component_select = nir_ieq(b, r600_imm_ivec3(b, 0, 1, 2),
437                                            nir_swizzle(b, first_comp, swz_000, 3));
438 
439    if (op->num_components == 1) {
440       nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
441                                       nir_channel(b, &load->dest.ssa, 0),
442                                       nir_channel(b, &load->dest.ssa, 3));
443       nir_ssa_def *check1 = nir_bcsel(b, nir_channel(b, component_select, 1),
444                                       nir_channel(b, &load->dest.ssa, 1),
445                                       check0);
446       return nir_bcsel(b, nir_channel(b, component_select, 2),
447                        nir_channel(b, &load->dest.ssa, 2),
448                        check1);
449    } else if (op->num_components == 2) {
450       const unsigned szw_01[2] = {0, 1};
451       const unsigned szw_12[2] = {1, 2};
452       const unsigned szw_23[2] = {2, 3};
453 
454       nir_ssa_def *check0 = nir_bcsel(b, nir_channel(b, component_select, 0),
455                                       nir_swizzle(b, &load->dest.ssa, szw_01, 2),
456                                       nir_swizzle(b, &load->dest.ssa, szw_23, 2));
457       return nir_bcsel(b, nir_channel(b, component_select, 1),
458                                       nir_swizzle(b, &load->dest.ssa, szw_12, 2),
459                                       check0);
460    } else {
461       const unsigned szw_012[3] = {0, 1, 2};
462       const unsigned szw_123[3] = {1, 2, 3};
463       return nir_bcsel(b, nir_channel(b, component_select, 0),
464                        nir_swizzle(b, &load->dest.ssa, szw_012, 3),
465                        nir_swizzle(b, &load->dest.ssa, szw_123, 3));
466    }
467 }
468 
r600_lower_ubo_to_align16_filter(const nir_instr * instr,const void * _options)469 bool r600_lower_ubo_to_align16_filter(const nir_instr *instr, const void *_options)
470 {
471    if (instr->type != nir_instr_type_intrinsic)
472       return false;
473 
474    nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
475    return op->intrinsic == nir_intrinsic_load_ubo;
476 }
477 
478 
r600_lower_ubo_to_align16(nir_shader * shader)479 bool r600_lower_ubo_to_align16(nir_shader *shader)
480 {
481    return nir_shader_lower_instructions(shader,
482                                         r600_lower_ubo_to_align16_filter,
483                                         r600_lower_ubo_to_align16_impl,
484                                         nullptr);
485 }
486 
487 }
488 
489 using r600::r600_nir_lower_int_tg4;
490 using r600::r600_nir_lower_pack_unpack_2x16;
491 using r600::r600_lower_scratch_addresses;
492 using r600::r600_lower_fs_out_to_vector;
493 using r600::r600_lower_ubo_to_align16;
494 
495 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)496 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
497 {
498    return glsl_count_vec4_slots(type, false, is_bindless);
499 }
500 
501 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)502 r600_get_natural_size_align_bytes(const struct glsl_type *type,
503                                   unsigned *size, unsigned *align)
504 {
505    if (type->base_type != GLSL_TYPE_ARRAY) {
506       *align = 1;
507       *size = 1;
508    } else {
509       unsigned elem_size, elem_align;
510       glsl_get_natural_size_align_bytes(type->fields.array,
511                                         &elem_size, &elem_align);
512       *align = 1;
513       *size = type->length;
514    }
515 }
516 
517 static bool
r600_lower_shared_io_impl(nir_function * func)518 r600_lower_shared_io_impl(nir_function *func)
519 {
520    nir_builder b;
521    nir_builder_init(&b, func->impl);
522 
523    bool progress = false;
524    nir_foreach_block(block, func->impl) {
525       nir_foreach_instr_safe(instr, block) {
526 
527          if (instr->type != nir_instr_type_intrinsic)
528             continue;
529 
530          nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
531          if (op->intrinsic != nir_intrinsic_load_shared &&
532              op->intrinsic != nir_intrinsic_store_shared)
533             continue;
534 
535          b.cursor = nir_before_instr(instr);
536 
537          if (op->intrinsic == nir_intrinsic_load_shared) {
538             nir_ssa_def *addr = op->src[0].ssa;
539 
540             switch (nir_dest_num_components(op->dest)) {
541             case 2: {
542                auto addr2 = nir_iadd_imm(&b, addr, 4);
543                addr = nir_vec2(&b, addr, addr2);
544                break;
545             }
546             case 3: {
547                auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
548                addr = nir_vec3(&b, addr,
549                                nir_channel(&b, addr2, 0),
550                                nir_channel(&b, addr2, 1));
551                break;
552             }
553             case 4: {
554                addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
555                break;
556             }
557             }
558 
559             auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
560             load->num_components = nir_dest_num_components(op->dest);
561             load->src[0] = nir_src_for_ssa(addr);
562             nir_ssa_dest_init(&load->instr, &load->dest,
563                               load->num_components, 32, NULL);
564             nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
565             nir_builder_instr_insert(&b, &load->instr);
566          } else {
567             nir_ssa_def *addr = op->src[1].ssa;
568             for (int i = 0; i < 2; ++i) {
569                unsigned test_mask = (0x3 << 2 * i);
570                if (!(nir_intrinsic_write_mask(op) & test_mask))
571                   continue;
572 
573                auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
574                unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
575                nir_intrinsic_set_write_mask(store, writemask);
576                store->src[0] = nir_src_for_ssa(op->src[0].ssa);
577                store->num_components = store->src[0].ssa->num_components;
578                bool start_even = (writemask & (1u << (2 * i)));
579 
580                auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
581                store->src[1] = nir_src_for_ssa(addr2);
582 
583                nir_builder_instr_insert(&b, &store->instr);
584             }
585          }
586          nir_instr_remove(instr);
587          progress = true;
588       }
589    }
590    return progress;
591 }
592 
593 static bool
r600_lower_shared_io(nir_shader * nir)594 r600_lower_shared_io(nir_shader *nir)
595 {
596 	bool progress=false;
597 	nir_foreach_function(function, nir) {
598 		if (function->impl &&
599 			 r600_lower_shared_io_impl(function))
600 			progress = true;
601 	}
602 	return progress;
603 }
604 
605 static bool
optimize_once(nir_shader * shader)606 optimize_once(nir_shader *shader)
607 {
608    bool progress = false;
609    NIR_PASS(progress, shader, nir_copy_prop);
610    NIR_PASS(progress, shader, nir_opt_dce);
611    NIR_PASS(progress, shader, nir_opt_algebraic);
612    NIR_PASS(progress, shader, nir_opt_constant_folding);
613    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
614    NIR_PASS(progress, shader, nir_opt_vectorize);
615 
616    NIR_PASS(progress, shader, nir_opt_remove_phis);
617 
618    if (nir_opt_trivial_continues(shader)) {
619            progress = true;
620            NIR_PASS(progress, shader, nir_copy_prop);
621            NIR_PASS(progress, shader, nir_opt_dce);
622    }
623 
624    NIR_PASS(progress, shader, nir_opt_if, false);
625    NIR_PASS(progress, shader, nir_opt_dead_cf);
626    NIR_PASS(progress, shader, nir_opt_cse);
627    NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
628 
629    NIR_PASS(progress, shader, nir_opt_conditional_discard);
630    NIR_PASS(progress, shader, nir_opt_dce);
631    NIR_PASS(progress, shader, nir_opt_undef);
632    return progress;
633 }
634 
has_saturate(const nir_function * func)635 bool has_saturate(const nir_function *func)
636 {
637    nir_foreach_block(block, func->impl) {
638       nir_foreach_instr(instr, block) {
639          if (instr->type == nir_instr_type_alu) {
640             auto alu = nir_instr_as_alu(instr);
641             if (alu->dest.saturate)
642                return true;
643          }
644       }
645    }
646    return false;
647 }
648 
r600_shader_from_nir(struct r600_context * rctx,struct r600_pipe_shader * pipeshader,r600_shader_key * key)649 int r600_shader_from_nir(struct r600_context *rctx,
650                          struct r600_pipe_shader *pipeshader,
651                          r600_shader_key *key)
652 {
653    char filename[4000];
654    struct r600_pipe_shader_selector *sel = pipeshader->selector;
655 
656    r600::ShaderFromNir convert;
657 
658    if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
659       fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
660       nir_print_shader(sel->nir, stderr);
661       fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
662    }
663 
664    NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
665    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
666    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
667 
668    NIR_PASS_V(sel->nir, r600_lower_shared_io);
669 
670    static const struct nir_lower_tex_options lower_tex_options = {
671       .lower_txp = ~0u,
672    };
673    NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
674    NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
675 
676    NIR_PASS_V(sel->nir, r600_nir_lower_int_tg4);
677    NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
678 
679    NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, r600_glsl_type_size,
680               nir_lower_io_lower_64bit_to_32);
681 
682    if (sel->nir->info.stage == MESA_SHADER_VERTEX)
683       NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
684 
685    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
686       NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
687 
688    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
689        (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
690       NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_out, r600_glsl_type_size,
691                  nir_lower_io_lower_64bit_to_32);
692       NIR_PASS_V(sel->nir, r600_lower_tess_io, (pipe_prim_type)key->tcs.prim_mode);
693    }
694 
695    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
696        sel->nir->info.stage == MESA_SHADER_TESS_EVAL) {
697       NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, r600_glsl_type_size,
698                  nir_lower_io_lower_64bit_to_32);
699    }
700 
701    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL ||
702        sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
703        (sel->nir->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
704       auto prim_type = sel->nir->info.stage == MESA_SHADER_TESS_CTRL ?
705                           key->tcs.prim_mode : sel->nir->info.tess.primitive_mode;
706       NIR_PASS_V(sel->nir, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
707    }
708 
709 
710    if (sel->nir->info.stage == MESA_SHADER_TESS_CTRL)
711       NIR_PASS_V(sel->nir, r600_append_tcs_TF_emission,
712                  (pipe_prim_type)key->tcs.prim_mode);
713 
714 
715    const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sel->nir->functions));
716    bool optimize = func->impl->registers.length() == 0 && !has_saturate(func);
717 
718    if (optimize) {
719       optimize_once(sel->nir);
720       NIR_PASS_V(sel->nir, r600_lower_ubo_to_align16);
721    }
722    /* It seems the output of this optimization is cached somewhere, and
723     * when there are registers, then we can no longer copy propagate, so
724     * skip the optimization then. (There is probably a better way, but yeah)
725     */
726    if (optimize)
727       while(optimize_once(sel->nir));
728 
729    NIR_PASS_V(sel->nir, nir_remove_dead_variables, nir_var_shader_in, NULL);
730    NIR_PASS_V(sel->nir, nir_remove_dead_variables,  nir_var_shader_out, NULL);
731 
732 
733    NIR_PASS_V(sel->nir, nir_lower_vars_to_scratch,
734               nir_var_function_temp,
735               40,
736               r600_get_natural_size_align_bytes);
737 
738    while (optimize && optimize_once(sel->nir));
739 
740    NIR_PASS_V(sel->nir, nir_lower_locals_to_regs);
741    //NIR_PASS_V(sel->nir, nir_opt_algebraic);
742    //NIR_PASS_V(sel->nir, nir_copy_prop);
743    NIR_PASS_V(sel->nir, nir_lower_to_source_mods, nir_lower_float_source_mods);
744    NIR_PASS_V(sel->nir, nir_convert_from_ssa, true);
745    NIR_PASS_V(sel->nir, nir_opt_dce);
746 
747    if ((rctx->screen->b.debug_flags & DBG_NIR) &&
748        (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
749       fprintf(stderr, "-- NIR --------------------------------------------------------\n");
750       struct nir_function *func = (struct nir_function *)exec_list_get_head(&sel->nir->functions);
751       nir_index_ssa_defs(func->impl);
752       nir_print_shader(sel->nir, stderr);
753       fprintf(stderr, "-- END --------------------------------------------------------\n");
754    }
755 
756    memset(&pipeshader->shader, 0, sizeof(r600_shader));
757    pipeshader->scratch_space_needed = sel->nir->scratch_size;
758 
759    if (sel->nir->info.stage == MESA_SHADER_TESS_EVAL ||
760        sel->nir->info.stage == MESA_SHADER_VERTEX ||
761        sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
762       pipeshader->shader.clip_dist_write |= ((1 << sel->nir->info.clip_distance_array_size) - 1);
763       pipeshader->shader.cull_dist_write = ((1 << sel->nir->info.cull_distance_array_size) - 1)
764                                            << sel->nir->info.clip_distance_array_size;
765       pipeshader->shader.cc_dist_mask = (1 <<  (sel->nir->info.cull_distance_array_size +
766                                                 sel->nir->info.clip_distance_array_size)) - 1;
767    }
768 
769    struct r600_shader* gs_shader = nullptr;
770    if (rctx->gs_shader)
771       gs_shader = &rctx->gs_shader->current->shader;
772    r600_screen *rscreen = rctx->screen;
773 
774    bool r = convert.lower(sel->nir, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
775    if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
776       static int shnr = 0;
777 
778       snprintf(filename, 4000, "nir-%s_%d.inc", sel->nir->info.name, shnr++);
779 
780       if (access(filename, F_OK) == -1) {
781          FILE *f = fopen(filename, "w");
782 
783          if (f) {
784             fprintf(f, "const char *shader_blob_%s = {\nR\"(", sel->nir->info.name);
785             nir_print_shader(sel->nir, f);
786             fprintf(f, ")\";\n");
787             fclose(f);
788          }
789       }
790       if (!r)
791          return -2;
792    }
793 
794    auto shader = convert.shader();
795 
796    r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
797                       rscreen->has_compressed_msaa_texturing);
798 
799    r600::sfn_log << r600::SfnLog::shader_info
800                  << "pipeshader->shader.processor_type = "
801                  << pipeshader->shader.processor_type << "\n";
802 
803    pipeshader->shader.bc.type = pipeshader->shader.processor_type;
804    pipeshader->shader.bc.isa = rctx->isa;
805 
806    r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
807    if (!afs.lower(shader.m_ir)) {
808       R600_ERR("%s: Lowering to assembly failed\n", __func__);
809       return -1;
810    }
811 
812    if (sel->nir->info.stage == MESA_SHADER_GEOMETRY) {
813       r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
814       generate_gs_copy_shader(rctx, pipeshader, &sel->so);
815       assert(pipeshader->gs_copy_shader);
816    } else {
817       r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
818    }
819 
820    return 0;
821 }
822