1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir.h"
28 #include "nir_builder.h"
29 
30 #include "../r600_pipe.h"
31 #include "../r600_shader.h"
32 
33 #include "sfn_instruction_tex.h"
34 
35 #include "sfn_shader_vertex.h"
36 #include "sfn_shader_fragment.h"
37 #include "sfn_shader_geometry.h"
38 #include "sfn_shader_compute.h"
39 #include "sfn_shader_tcs.h"
40 #include "sfn_shader_tess_eval.h"
41 #include "sfn_nir_lower_fs_out_to_vector.h"
42 #include "sfn_ir_to_assembly.h"
43 #include "sfn_nir_lower_alu.h"
44 
45 #include <vector>
46 
47 namespace r600 {
48 
49 using std::vector;
50 
51 
NirLowerInstruction()52 NirLowerInstruction::NirLowerInstruction():
53 	b(nullptr)
54 {
55 
56 }
57 
filter_instr(const nir_instr * instr,const void * data)58 bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
59 {
60    auto me = reinterpret_cast<const NirLowerInstruction*>(data);
61    return me->filter(instr);
62 }
63 
lower_instr(nir_builder * b,nir_instr * instr,void * data)64 nir_ssa_def *NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
65 {
66    auto me = reinterpret_cast<NirLowerInstruction*>(data);
67    me->set_builder(b);
68    return me->lower(instr);
69 }
70 
run(nir_shader * shader)71 bool NirLowerInstruction::run(nir_shader *shader)
72 {
73    return nir_shader_lower_instructions(shader,
74                                         filter_instr,
75                                         lower_instr,
76                                         (void *)this);
77 }
78 
79 
ShaderFromNir()80 ShaderFromNir::ShaderFromNir():sh(nullptr),
81    chip_class(CLASS_UNKNOWN),
82    m_current_if_id(0),
83    m_current_loop_id(0),
84    scratch_size(0)
85 {
86 }
87 
lower(const nir_shader * shader,r600_pipe_shader * pipe_shader,r600_pipe_shader_selector * sel,r600_shader_key & key,struct r600_shader * gs_shader,enum chip_class _chip_class)88 bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
89                           r600_pipe_shader_selector *sel, r600_shader_key& key,
90                           struct r600_shader* gs_shader, enum chip_class _chip_class)
91 {
92    sh = shader;
93    chip_class = _chip_class;
94    assert(sh);
95 
96    switch (shader->info.stage) {
97    case MESA_SHADER_VERTEX:
98       impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
99       break;
100    case MESA_SHADER_TESS_CTRL:
101       sfn_log << SfnLog::trans << "Start TCS\n";
102       impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
103       break;
104    case MESA_SHADER_TESS_EVAL:
105       sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
106       impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
107       break;
108    case MESA_SHADER_GEOMETRY:
109       sfn_log << SfnLog::trans << "Start GS\n";
110       impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
111       break;
112    case MESA_SHADER_FRAGMENT:
113       sfn_log << SfnLog::trans << "Start FS\n";
114       impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
115       break;
116    case MESA_SHADER_COMPUTE:
117       sfn_log << SfnLog::trans << "Start CS\n";
118       impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
119       break;
120    default:
121       return false;
122    }
123 
124    sfn_log << SfnLog::trans << "Process declarations\n";
125    if (!process_declaration())
126       return false;
127 
128    // at this point all functions should be inlined
129    const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
130 
131    sfn_log << SfnLog::trans << "Scan shader\n";
132 
133    if (sfn_log.has_debug_flag(SfnLog::instr))
134       nir_print_shader(const_cast<nir_shader *>(shader), stderr);
135 
136    nir_foreach_block(block, func->impl) {
137       nir_foreach_instr(instr, block) {
138          if (!impl->scan_instruction(instr)) {
139             fprintf(stderr, "Unhandled sysvalue access ");
140             nir_print_instr(instr, stderr);
141             fprintf(stderr, "\n");
142             return false;
143          }
144       }
145    }
146 
147    sfn_log << SfnLog::trans << "Reserve registers\n";
148    if (!impl->allocate_reserved_registers()) {
149       return false;
150    }
151 
152    ValuePool::array_list arrays;
153    sfn_log << SfnLog::trans << "Allocate local registers\n";
154    foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
155       impl->allocate_local_register(*reg, arrays);
156    }
157 
158    sfn_log << SfnLog::trans << "Emit shader start\n";
159    impl->allocate_arrays(arrays);
160 
161    impl->emit_shader_start();
162 
163    sfn_log << SfnLog::trans << "Process shader \n";
164    foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
165       if (!process_cf_node(node))
166          return false;
167    }
168 
169    // Add optimizations here
170    sfn_log << SfnLog::trans << "Finalize\n";
171    impl->finalize();
172 
173    impl->get_array_info(pipe_shader->shader);
174 
175    if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
176       sfn_log << SfnLog::trans << "Merge registers\n";
177       impl->remap_registers();
178    }
179 
180    sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
181    return true;
182 }
183 
shader() const184 Shader ShaderFromNir::shader() const
185 {
186    return Shader{impl->m_output, impl->get_temp_registers()};
187 }
188 
189 
process_cf_node(nir_cf_node * node)190 bool ShaderFromNir::process_cf_node(nir_cf_node *node)
191 {
192    SFN_TRACE_FUNC(SfnLog::flow, "CF");
193    switch (node->type) {
194    case nir_cf_node_block:
195       return process_block(nir_cf_node_as_block(node));
196    case nir_cf_node_if:
197       return process_if(nir_cf_node_as_if(node));
198    case nir_cf_node_loop:
199       return process_loop(nir_cf_node_as_loop(node));
200    default:
201       return false;
202    }
203 }
204 
process_if(nir_if * if_stmt)205 bool ShaderFromNir::process_if(nir_if *if_stmt)
206 {
207    SFN_TRACE_FUNC(SfnLog::flow, "IF");
208 
209    if (!impl->emit_if_start(m_current_if_id, if_stmt))
210       return false;
211 
212    int if_id = m_current_if_id++;
213    m_if_stack.push(if_id);
214 
215    foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
216          if (!process_cf_node(n)) return false;
217 
218    if (!if_stmt->then_list.is_empty()) {
219       if (!impl->emit_else_start(if_id))
220          return false;
221 
222       foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
223             if (!process_cf_node(n)) return false;
224    }
225 
226    if (!impl->emit_ifelse_end(if_id))
227       return false;
228 
229    m_if_stack.pop();
230    return true;
231 }
232 
process_loop(nir_loop * node)233 bool ShaderFromNir::process_loop(nir_loop *node)
234 {
235    SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
236    int loop_id = m_current_loop_id++;
237 
238    if (!impl->emit_loop_start(loop_id))
239       return false;
240 
241    foreach_list_typed(nir_cf_node, n, node, &node->body)
242          if (!process_cf_node(n)) return false;
243 
244    if (!impl->emit_loop_end(loop_id))
245       return false;
246 
247    return true;
248 }
249 
process_block(nir_block * block)250 bool ShaderFromNir::process_block(nir_block *block)
251 {
252    SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
253    nir_foreach_instr(instr, block) {
254       int r = emit_instruction(instr);
255       if (!r) {
256          sfn_log << SfnLog::err << "R600: Unsupported instruction: "
257                  << *instr << "\n";
258          return false;
259       }
260    }
261    return true;
262 }
263 
264 
~ShaderFromNir()265 ShaderFromNir::~ShaderFromNir()
266 {
267 }
268 
processor_type() const269 pipe_shader_type ShaderFromNir::processor_type() const
270 {
271    return impl->m_processor_type;
272 }
273 
274 
emit_instruction(nir_instr * instr)275 bool ShaderFromNir::emit_instruction(nir_instr *instr)
276 {
277    assert(impl);
278 
279    sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
280 
281    switch (instr->type) {
282    case nir_instr_type_alu:
283       return impl->emit_alu_instruction(instr);
284    case nir_instr_type_deref:
285       return impl->emit_deref_instruction(nir_instr_as_deref(instr));
286    case nir_instr_type_intrinsic:
287       return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
288    case nir_instr_type_load_const: /* const values are loaded when needed */
289       return true;
290    case nir_instr_type_tex:
291       return impl->emit_tex_instruction(instr);
292    case nir_instr_type_jump:
293       return impl->emit_jump_instruction(nir_instr_as_jump(instr));
294    default:
295       fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
296       nir_print_instr(instr, stderr);
297       fprintf(stderr, "'\n");
298       return false;
299    case nir_instr_type_ssa_undef:
300       return impl->create_undef(nir_instr_as_ssa_undef(instr));
301       return true;
302    }
303 }
304 
process_declaration()305 bool ShaderFromNir::process_declaration()
306 {
307    impl->set_shader_info(sh);
308 
309    if (!impl->scan_inputs_read(sh))
310       return false;
311 
312    // scan declarations
313    nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
314                                                  nir_var_mem_ubo |
315                                                  nir_var_mem_ssbo) {
316       if (!impl->process_uniforms(variable)) {
317          fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
318          return false;
319       }
320    }
321 
322    return true;
323 }
324 
shader_ir() const325 const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
326 {
327    assert(impl);
328    return impl->m_output;
329 }
330 
331 
~AssemblyFromShader()332 AssemblyFromShader::~AssemblyFromShader()
333 {
334 }
335 
lower(const std::vector<InstructionBlock> & ir)336 bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
337 {
338    return do_lower(ir);
339 }
340 
341 static void
r600_nir_lower_scratch_address_impl(nir_builder * b,nir_intrinsic_instr * instr)342 r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
343 {
344    b->cursor = nir_before_instr(&instr->instr);
345 
346    int address_index = 0;
347    int align;
348 
349    if (instr->intrinsic == nir_intrinsic_store_scratch) {
350       align  = instr->src[0].ssa->num_components;
351       address_index = 1;
352    } else{
353       align = instr->dest.ssa.num_components;
354    }
355 
356    nir_ssa_def *address = instr->src[address_index].ssa;
357    nir_ssa_def *new_address = nir_ishr(b, address,  nir_imm_int(b, 4 * align));
358 
359    nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
360                          nir_src_for_ssa(new_address));
361 }
362 
r600_lower_scratch_addresses(nir_shader * shader)363 bool r600_lower_scratch_addresses(nir_shader *shader)
364 {
365    bool progress = false;
366    nir_foreach_function(function, shader) {
367       nir_builder build;
368       nir_builder_init(&build, function->impl);
369 
370       nir_foreach_block(block, function->impl) {
371          nir_foreach_instr(instr, block) {
372             if (instr->type != nir_instr_type_intrinsic)
373                continue;
374             nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
375             if (op->intrinsic != nir_intrinsic_load_scratch &&
376                 op->intrinsic != nir_intrinsic_store_scratch)
377                continue;
378             r600_nir_lower_scratch_address_impl(&build, op);
379             progress = true;
380          }
381       }
382    }
383    return progress;
384 }
385 
386 static void
insert_uniform_sorted(struct exec_list * var_list,nir_variable * new_var)387 insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
388 {
389    nir_foreach_variable_in_list(var, var_list) {
390       if (var->data.binding > new_var->data.binding ||
391           (var->data.binding == new_var->data.binding &&
392            var->data.offset > new_var->data.offset)) {
393          exec_node_insert_node_before(&var->node, &new_var->node);
394          return;
395       }
396    }
397    exec_list_push_tail(var_list, &new_var->node);
398 }
399 
sort_uniforms(nir_shader * shader)400 void sort_uniforms(nir_shader *shader)
401 {
402    struct exec_list new_list;
403    exec_list_make_empty(&new_list);
404 
405    nir_foreach_uniform_variable_safe(var, shader) {
406       exec_node_remove(&var->node);
407       insert_uniform_sorted(&new_list, var);
408    }
409    exec_list_append(&shader->variables, &new_list);
410 }
411 
412 static void
insert_fsoutput_sorted(struct exec_list * var_list,nir_variable * new_var)413 insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
414 {
415 
416    nir_foreach_variable_in_list(var, var_list) {
417       if (var->data.location > new_var->data.location ||
418           (var->data.location == new_var->data.location &&
419            var->data.index > new_var->data.index)) {
420          exec_node_insert_node_before(&var->node, &new_var->node);
421          return;
422       }
423    }
424 
425    exec_list_push_tail(var_list, &new_var->node);
426 }
427 
sort_fsoutput(nir_shader * shader)428 void sort_fsoutput(nir_shader *shader)
429 {
430    struct exec_list new_list;
431    exec_list_make_empty(&new_list);
432 
433    nir_foreach_shader_out_variable_safe(var, shader) {
434       exec_node_remove(&var->node);
435       insert_fsoutput_sorted(&new_list, var);
436    }
437 
438    unsigned driver_location = 0;
439    nir_foreach_variable_in_list(var, &new_list)
440       var->data.driver_location = driver_location++;
441 
442    exec_list_append(&shader->variables, &new_list);
443 }
444 
445 }
446 
447 static nir_intrinsic_op
r600_map_atomic(nir_intrinsic_op op)448 r600_map_atomic(nir_intrinsic_op op)
449 {
450    switch (op) {
451    case nir_intrinsic_atomic_counter_read_deref:
452       return nir_intrinsic_atomic_counter_read;
453    case nir_intrinsic_atomic_counter_inc_deref:
454       return nir_intrinsic_atomic_counter_inc;
455    case nir_intrinsic_atomic_counter_pre_dec_deref:
456       return nir_intrinsic_atomic_counter_pre_dec;
457    case nir_intrinsic_atomic_counter_post_dec_deref:
458       return nir_intrinsic_atomic_counter_post_dec;
459    case nir_intrinsic_atomic_counter_add_deref:
460       return nir_intrinsic_atomic_counter_add;
461    case nir_intrinsic_atomic_counter_min_deref:
462       return nir_intrinsic_atomic_counter_min;
463    case nir_intrinsic_atomic_counter_max_deref:
464       return nir_intrinsic_atomic_counter_max;
465    case nir_intrinsic_atomic_counter_and_deref:
466       return nir_intrinsic_atomic_counter_and;
467    case nir_intrinsic_atomic_counter_or_deref:
468       return nir_intrinsic_atomic_counter_or;
469    case nir_intrinsic_atomic_counter_xor_deref:
470       return nir_intrinsic_atomic_counter_xor;
471    case nir_intrinsic_atomic_counter_exchange_deref:
472       return nir_intrinsic_atomic_counter_exchange;
473    case nir_intrinsic_atomic_counter_comp_swap_deref:
474       return nir_intrinsic_atomic_counter_comp_swap;
475    default:
476       return nir_num_intrinsics;
477    }
478 }
479 
480 static bool
r600_lower_deref_instr(nir_builder * b,nir_instr * instr_,UNUSED void * cb_data)481 r600_lower_deref_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
482 {
483    if (instr_->type != nir_instr_type_intrinsic)
484       return false;
485 
486    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
487 
488    nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
489    if (nir_num_intrinsics == op)
490       return false;
491 
492    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
493    nir_variable *var = nir_deref_instr_get_variable(deref);
494 
495    if (var->data.mode != nir_var_uniform &&
496        var->data.mode != nir_var_mem_ssbo &&
497        var->data.mode != nir_var_mem_shared)
498       return false; /* atomics passed as function arguments can't be lowered */
499 
500    const unsigned idx = var->data.binding;
501 
502    b->cursor = nir_before_instr(&instr->instr);
503 
504    nir_ssa_def *offset = nir_imm_int(b, var->data.index);
505    for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
506         d = nir_deref_instr_parent(d)) {
507       assert(d->deref_type == nir_deref_type_array);
508       assert(d->arr.index.is_ssa);
509 
510       unsigned array_stride = 1;
511       if (glsl_type_is_array(d->type))
512          array_stride *= glsl_get_aoa_size(d->type);
513 
514       offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
515                                             nir_imm_int(b, array_stride)));
516    }
517 
518    /* Since the first source is a deref and the first source in the lowered
519     * instruction is the offset, we can just swap it out and change the
520     * opcode.
521     */
522    instr->intrinsic = op;
523    nir_instr_rewrite_src(&instr->instr, &instr->src[0],
524                          nir_src_for_ssa(offset));
525    nir_intrinsic_set_base(instr, idx);
526 
527    nir_deref_instr_remove_if_unused(deref);
528 
529    return true;
530 }
531 
532 static bool
r600_nir_lower_atomics(nir_shader * shader)533 r600_nir_lower_atomics(nir_shader *shader)
534 {
535    /* First re-do the offsets, in Hardware we start at zero for each new
536     * binding, and we use an offset of one per counter */
537    int current_binding = -1;
538    int current_offset = 0;
539    nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
540       if (!var->type->contains_atomic())
541          continue;
542 
543       if (current_binding == (int)var->data.binding) {
544          var->data.index = current_offset;
545          current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
546       } else {
547          current_binding = var->data.binding;
548          var->data.index = 0;
549          current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
550       }
551    }
552 
553    return nir_shader_instructions_pass(shader, r600_lower_deref_instr,
554                                        nir_metadata_block_index |
555                                        nir_metadata_dominance,
556                                        NULL);
557 }
558 using r600::r600_nir_lower_int_tg4;
559 using r600::r600_lower_scratch_addresses;
560 using r600::r600_lower_fs_out_to_vector;
561 using r600::r600_lower_ubo_to_align16;
562 
563 int
r600_glsl_type_size(const struct glsl_type * type,bool is_bindless)564 r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
565 {
566    return glsl_count_vec4_slots(type, false, is_bindless);
567 }
568 
569 void
r600_get_natural_size_align_bytes(const struct glsl_type * type,unsigned * size,unsigned * align)570 r600_get_natural_size_align_bytes(const struct glsl_type *type,
571                                   unsigned *size, unsigned *align)
572 {
573    if (type->base_type != GLSL_TYPE_ARRAY) {
574       *align = 1;
575       *size = 1;
576    } else {
577       unsigned elem_size, elem_align;
578       glsl_get_natural_size_align_bytes(type->fields.array,
579                                         &elem_size, &elem_align);
580       *align = 1;
581       *size = type->length;
582    }
583 }
584 
585 static bool
r600_lower_shared_io_impl(nir_function * func)586 r600_lower_shared_io_impl(nir_function *func)
587 {
588    nir_builder b;
589    nir_builder_init(&b, func->impl);
590 
591    bool progress = false;
592    nir_foreach_block(block, func->impl) {
593       nir_foreach_instr_safe(instr, block) {
594 
595          if (instr->type != nir_instr_type_intrinsic)
596             continue;
597 
598          nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
599          if (op->intrinsic != nir_intrinsic_load_shared &&
600              op->intrinsic != nir_intrinsic_store_shared)
601             continue;
602 
603          b.cursor = nir_before_instr(instr);
604 
605          if (op->intrinsic == nir_intrinsic_load_shared) {
606             nir_ssa_def *addr = op->src[0].ssa;
607 
608             switch (nir_dest_num_components(op->dest)) {
609             case 2: {
610                auto addr2 = nir_iadd_imm(&b, addr, 4);
611                addr = nir_vec2(&b, addr, addr2);
612                break;
613             }
614             case 3: {
615                auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
616                addr = nir_vec3(&b, addr,
617                                nir_channel(&b, addr2, 0),
618                                nir_channel(&b, addr2, 1));
619                break;
620             }
621             case 4: {
622                addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
623                break;
624             }
625             }
626 
627             auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
628             load->num_components = nir_dest_num_components(op->dest);
629             load->src[0] = nir_src_for_ssa(addr);
630             nir_ssa_dest_init(&load->instr, &load->dest,
631                               load->num_components, 32, NULL);
632             nir_ssa_def_rewrite_uses(&op->dest.ssa, &load->dest.ssa);
633             nir_builder_instr_insert(&b, &load->instr);
634          } else {
635             nir_ssa_def *addr = op->src[1].ssa;
636             for (int i = 0; i < 2; ++i) {
637                unsigned test_mask = (0x3 << 2 * i);
638                if (!(nir_intrinsic_write_mask(op) & test_mask))
639                   continue;
640 
641                auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
642                unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
643                nir_intrinsic_set_write_mask(store, writemask);
644                store->src[0] = nir_src_for_ssa(op->src[0].ssa);
645                store->num_components = store->src[0].ssa->num_components;
646                bool start_even = (writemask & (1u << (2 * i)));
647 
648                auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
649                store->src[1] = nir_src_for_ssa(addr2);
650 
651                nir_builder_instr_insert(&b, &store->instr);
652             }
653          }
654          nir_instr_remove(instr);
655          progress = true;
656       }
657    }
658    return progress;
659 }
660 
661 static bool
r600_lower_shared_io(nir_shader * nir)662 r600_lower_shared_io(nir_shader *nir)
663 {
664 	bool progress=false;
665 	nir_foreach_function(function, nir) {
666 		if (function->impl &&
667 			 r600_lower_shared_io_impl(function))
668 			progress = true;
669 	}
670 	return progress;
671 }
672 
673 
674 static nir_ssa_def *
r600_lower_fs_pos_input_impl(nir_builder * b,nir_instr * instr,void * _options)675 r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
676 {
677    auto old_ir = nir_instr_as_intrinsic(instr);
678    auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
679    nir_ssa_dest_init(&load->instr, &load->dest,
680                      old_ir->dest.ssa.num_components, old_ir->dest.ssa.bit_size, NULL);
681    nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
682 
683    nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
684    nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
685    nir_intrinsic_set_dest_type(load, nir_type_float32);
686    load->num_components = old_ir->num_components;
687    load->src[0] = old_ir->src[1];
688    nir_builder_instr_insert(b, &load->instr);
689    return &load->dest.ssa;
690 }
691 
r600_lower_fs_pos_input_filter(const nir_instr * instr,const void * _options)692 bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
693 {
694    if (instr->type != nir_instr_type_intrinsic)
695       return false;
696 
697    auto ir = nir_instr_as_intrinsic(instr);
698    if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
699       return false;
700 
701    return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
702 }
703 
704 /* Strip the interpolator specification, it is not needed and irritates */
r600_lower_fs_pos_input(nir_shader * shader)705 bool r600_lower_fs_pos_input(nir_shader *shader)
706 {
707    return nir_shader_lower_instructions(shader,
708                                         r600_lower_fs_pos_input_filter,
709                                         r600_lower_fs_pos_input_impl,
710                                         nullptr);
711 };
712 
713 static bool
optimize_once(nir_shader * shader,bool vectorize)714 optimize_once(nir_shader *shader, bool vectorize)
715 {
716    bool progress = false;
717    NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
718    NIR_PASS(progress, shader, nir_copy_prop);
719    NIR_PASS(progress, shader, nir_opt_dce);
720    NIR_PASS(progress, shader, nir_opt_algebraic);
721    NIR_PASS(progress, shader, nir_opt_constant_folding);
722    NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
723    if (vectorize)
724       NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
725 
726    NIR_PASS(progress, shader, nir_opt_remove_phis);
727 
728    if (nir_opt_trivial_continues(shader)) {
729            progress = true;
730            NIR_PASS(progress, shader, nir_copy_prop);
731            NIR_PASS(progress, shader, nir_opt_dce);
732    }
733 
734    NIR_PASS(progress, shader, nir_opt_if, false);
735    NIR_PASS(progress, shader, nir_opt_dead_cf);
736    NIR_PASS(progress, shader, nir_opt_cse);
737    NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
738 
739    NIR_PASS(progress, shader, nir_opt_conditional_discard);
740    NIR_PASS(progress, shader, nir_opt_dce);
741    NIR_PASS(progress, shader, nir_opt_undef);
742    return progress;
743 }
744 
has_saturate(const nir_function * func)745 bool has_saturate(const nir_function *func)
746 {
747    nir_foreach_block(block, func->impl) {
748       nir_foreach_instr(instr, block) {
749          if (instr->type == nir_instr_type_alu) {
750             auto alu = nir_instr_as_alu(instr);
751             if (alu->dest.saturate)
752                return true;
753          }
754       }
755    }
756    return false;
757 }
758 
759 extern "C"
r600_lower_to_scalar_instr_filter(const nir_instr * instr,const void *)760 bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
761 {
762    if (instr->type != nir_instr_type_alu)
763       return true;
764 
765    auto alu = nir_instr_as_alu(instr);
766    switch (alu->op) {
767    case nir_op_bany_fnequal3:
768    case nir_op_bany_fnequal4:
769    case nir_op_ball_fequal3:
770    case nir_op_ball_fequal4:
771    case nir_op_bany_inequal3:
772    case nir_op_bany_inequal4:
773    case nir_op_ball_iequal3:
774    case nir_op_ball_iequal4:
775    case nir_op_fdot2:
776    case nir_op_fdot3:
777    case nir_op_fdot4:
778    case nir_op_cube_r600:
779       return false;
780    case nir_op_bany_fnequal2:
781    case nir_op_ball_fequal2:
782    case nir_op_bany_inequal2:
783    case nir_op_ball_iequal2:
784       return nir_src_bit_size(alu->src[0].src) != 64;
785    default:
786       return true;
787    }
788 }
789 
r600_shader_from_nir(struct r600_context * rctx,struct r600_pipe_shader * pipeshader,r600_shader_key * key)790 int r600_shader_from_nir(struct r600_context *rctx,
791                          struct r600_pipe_shader *pipeshader,
792                          r600_shader_key *key)
793 {
794    char filename[4000];
795    struct r600_pipe_shader_selector *sel = pipeshader->selector;
796 
797    bool lower_64bit = ((sel->nir->options->lower_int64_options ||
798                         sel->nir->options->lower_doubles_options) &&
799                        (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
800 
801    r600::ShaderFromNir convert;
802 
803    if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
804       fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
805       nir_print_shader(sel->nir, stderr);
806       fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
807    }
808 
809    r600::sort_uniforms(sel->nir);
810 
811    /* Cayman seems very crashy about accessing images that don't exists or are
812     * accessed out of range, this lowering seems to help (but it can also be
813     * another problem */
814    if (sel->nir->info.num_images > 0 && rctx->b.chip_class == CAYMAN)
815        NIR_PASS_V(sel->nir, r600_legalize_image_load_store);
816 
817    NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
818    NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
819    nir_lower_idiv_options idiv_options = {0};
820    idiv_options.imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE;
821    idiv_options.allow_fp16 = true;
822 
823    NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
824    NIR_PASS_V(sel->nir, r600_nir_lower_trigen);
825    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
826 
827    if (lower_64bit)
828       NIR_PASS_V(sel->nir, nir_lower_int64);
829    while(optimize_once(sel->nir, false));
830 
831    NIR_PASS_V(sel->nir, r600_lower_shared_io);
832    NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
833 
834    struct nir_lower_tex_options lower_tex_options = {0};
835    lower_tex_options.lower_txp = ~0u;
836    lower_tex_options.lower_txf_offset = true;
837 
838    NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
839    NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
840    NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
841 
842    NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
843 
844    if (sel->nir->info.stage == MESA_SHADER_VERTEX)
845       NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
846 
847    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
848       NIR_PASS_V(sel->nir, nir_lower_fragcoord_wtrans);
849       NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
850    }
851 
852    nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
853 
854    //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
855       io_modes |= nir_var_shader_out;
856 
857    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
858 
859       /* Lower IO to temporaries late, because otherwise we get into trouble
860        * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
861        * somewhere that results in the input alweas reading from the same temp
862        * regardless of interpolation when the lowering is done early */
863       NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
864               true, true);
865 
866       /* Since we're doing nir_lower_io_to_temporaries late, we need
867        * to lower all the copy_deref's introduced by
868        * lower_io_to_temporaries before calling nir_lower_io.
869        */
870       NIR_PASS_V(sel->nir, nir_split_var_copies);
871       NIR_PASS_V(sel->nir, nir_lower_var_copies);
872       NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
873    }
874 
875    NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
876                  nir_lower_io_lower_64bit_to_32);
877 
878    if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
879       NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
880 
881    /**/
882    if (lower_64bit)
883       NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
884 
885    NIR_PASS_V(sel->nir, nir_opt_constant_folding);
886    NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes);
887 
888    NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
889    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
890    if (lower_64bit)
891       NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io);
892    NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
893    NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false);
894    NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
895    NIR_PASS_V(sel->nir, nir_copy_prop);
896    NIR_PASS_V(sel->nir, nir_opt_dce);
897 
898    auto sh = nir_shader_clone(sel->nir, sel->nir);
899 
900    if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
901        sh->info.stage == MESA_SHADER_TESS_EVAL ||
902        (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
903       auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL ?
904                           sh->info.tess.primitive_mode: key->tcs.prim_mode;
905       NIR_PASS_V(sh, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
906    }
907 
908    if (sh->info.stage == MESA_SHADER_TESS_CTRL)
909       NIR_PASS_V(sh, r600_append_tcs_TF_emission,
910                  (pipe_prim_type)key->tcs.prim_mode);
911 
912    if (sh->info.stage == MESA_SHADER_TESS_EVAL)
913       NIR_PASS_V(sh, r600_lower_tess_coord,
914                  static_cast<pipe_prim_type>(sh->info.tess.primitive_mode));
915 
916    NIR_PASS_V(sh, nir_lower_ubo_vec4);
917    if (lower_64bit)
918       NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
919 
920    /* Lower to scalar to let some optimization work out better */
921    while(optimize_once(sh, false));
922 
923    NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
924 
925    NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
926    NIR_PASS_V(sh, nir_remove_dead_variables,  nir_var_shader_out, NULL);
927 
928 
929    NIR_PASS_V(sh, nir_lower_vars_to_scratch,
930               nir_var_function_temp,
931               40,
932               r600_get_natural_size_align_bytes);
933 
934    while (optimize_once(sh, true));
935 
936    NIR_PASS_V(sh, nir_lower_bool_to_int32);
937    NIR_PASS_V(sh, r600_nir_lower_int_tg4);
938    NIR_PASS_V(sh, nir_opt_algebraic_late);
939 
940    if (sh->info.stage == MESA_SHADER_FRAGMENT)
941       r600::sort_fsoutput(sh);
942 
943    NIR_PASS_V(sh, nir_lower_locals_to_regs);
944 
945    //NIR_PASS_V(sh, nir_opt_algebraic);
946    //NIR_PASS_V(sh, nir_copy_prop);
947    NIR_PASS_V(sh, nir_lower_to_source_mods,
948 	      (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
949 					       nir_lower_64bit_source_mods));
950    NIR_PASS_V(sh, nir_convert_from_ssa, true);
951    NIR_PASS_V(sh, nir_opt_dce);
952 
953    if ((rctx->screen->b.debug_flags & DBG_NIR_PREFERRED) &&
954        (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
955       fprintf(stderr, "-- NIR --------------------------------------------------------\n");
956       struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions);
957       nir_index_ssa_defs(func->impl);
958       nir_print_shader(sh, stderr);
959       fprintf(stderr, "-- END --------------------------------------------------------\n");
960    }
961 
962    memset(&pipeshader->shader, 0, sizeof(r600_shader));
963    pipeshader->scratch_space_needed = sh->scratch_size;
964 
965    if (sh->info.stage == MESA_SHADER_TESS_EVAL ||
966        sh->info.stage == MESA_SHADER_VERTEX ||
967        sh->info.stage == MESA_SHADER_GEOMETRY) {
968       pipeshader->shader.clip_dist_write |= ((1 << sh->info.clip_distance_array_size) - 1);
969       pipeshader->shader.cull_dist_write = ((1 << sh->info.cull_distance_array_size) - 1)
970                                            << sh->info.clip_distance_array_size;
971       pipeshader->shader.cc_dist_mask = (1 <<  (sh->info.cull_distance_array_size +
972                                                 sh->info.clip_distance_array_size)) - 1;
973    }
974 
975    struct r600_shader* gs_shader = nullptr;
976    if (rctx->gs_shader)
977       gs_shader = &rctx->gs_shader->current->shader;
978    r600_screen *rscreen = rctx->screen;
979 
980    bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
981    if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
982       static int shnr = 0;
983 
984       snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
985 
986       if (access(filename, F_OK) == -1) {
987          FILE *f = fopen(filename, "w");
988 
989          if (f) {
990             fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
991             nir_print_shader(sh, f);
992             fprintf(f, ")\";\n");
993             fclose(f);
994          }
995       }
996       if (!r)
997          return -2;
998    }
999 
1000    auto shader = convert.shader();
1001 
1002    r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
1003                       rscreen->has_compressed_msaa_texturing);
1004 
1005    r600::sfn_log << r600::SfnLog::shader_info
1006                  << "pipeshader->shader.processor_type = "
1007                  << pipeshader->shader.processor_type << "\n";
1008 
1009    pipeshader->shader.bc.type = pipeshader->shader.processor_type;
1010    pipeshader->shader.bc.isa = rctx->isa;
1011 
1012    r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
1013    if (!afs.lower(shader.m_ir)) {
1014       R600_ERR("%s: Lowering to assembly failed\n", __func__);
1015       return -1;
1016    }
1017 
1018    if (sh->info.stage == MESA_SHADER_GEOMETRY) {
1019       r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
1020       generate_gs_copy_shader(rctx, pipeshader, &sel->so);
1021       assert(pipeshader->gs_copy_shader);
1022    } else {
1023       r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
1024    }
1025    if (pipeshader->shader.bc.ngpr < 6)
1026       pipeshader->shader.bc.ngpr = 6;
1027 
1028    return 0;
1029 }
1030