1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30 
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40 
41 #include <iostream>
42 
43 #define ENABLE_DEBUG 1
44 
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X)  \
47    do {\
48       X; \
49    } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53 
54 namespace r600 {
55 
56 using namespace std;
57 
58 
ShaderFromNirProcessor(pipe_shader_type ptype,r600_pipe_shader_selector & sel,r600_shader & sh_info,int scratch_size,enum chip_class chip_class,int atomic_base)59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60                                                r600_pipe_shader_selector& sel,
61                                                r600_shader &sh_info, int scratch_size,
62                                                enum chip_class chip_class,
63                                                int atomic_base):
64    m_processor_type(ptype),
65    m_nesting_depth(0),
66    m_block_number(0),
67    m_export_output(0, -1),
68    m_sh_info(sh_info),
69    m_chip_class(chip_class),
70    m_tex_instr(*this),
71    m_alu_instr(*this),
72    m_ssbo_instr(*this),
73    m_pending_else(nullptr),
74    m_scratch_size(scratch_size),
75    m_next_hwatomic_loc(0),
76    m_sel(sel),
77    m_atomic_base(atomic_base),
78    m_image_count(0),
79    last_emitted_alu(nullptr)
80 {
81    m_sh_info.processor_type = ptype;
82 
83 }
84 
85 
~ShaderFromNirProcessor()86 ShaderFromNirProcessor::~ShaderFromNirProcessor()
87 {
88 }
89 
scan_instruction(nir_instr * instr)90 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
91 {
92    switch (instr->type) {
93    case nir_instr_type_tex: {
94       nir_tex_instr *t = nir_instr_as_tex(instr);
95       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
96          sh_info().uses_tex_buffers = true;
97       if (t->op == nir_texop_txs &&
98           t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
99           t->is_array)
100          sh_info().has_txq_cube_array_z_comp = true;
101       break;
102    }
103    case nir_instr_type_intrinsic: {
104       auto *i = nir_instr_as_intrinsic(instr);
105       switch (i->intrinsic) {
106       case nir_intrinsic_ssbo_atomic_add:
107       case nir_intrinsic_image_atomic_add:
108       case nir_intrinsic_ssbo_atomic_and:
109       case nir_intrinsic_image_atomic_and:
110       case nir_intrinsic_ssbo_atomic_or:
111       case nir_intrinsic_image_atomic_or:
112       case nir_intrinsic_ssbo_atomic_imin:
113       case nir_intrinsic_image_atomic_imin:
114       case nir_intrinsic_ssbo_atomic_imax:
115       case nir_intrinsic_image_atomic_imax:
116       case nir_intrinsic_ssbo_atomic_umin:
117       case nir_intrinsic_image_atomic_umin:
118       case nir_intrinsic_ssbo_atomic_umax:
119       case nir_intrinsic_image_atomic_umax:
120       case nir_intrinsic_ssbo_atomic_xor:
121       case nir_intrinsic_image_atomic_xor:
122       case nir_intrinsic_ssbo_atomic_exchange:
123       case nir_intrinsic_image_atomic_exchange:
124       case nir_intrinsic_image_atomic_comp_swap:
125       case nir_intrinsic_ssbo_atomic_comp_swap:
126          m_sel.info.writes_memory = 1;
127          FALLTHROUGH;
128       case nir_intrinsic_image_load:
129          m_ssbo_instr.set_require_rat_return_address();
130          break;
131       case nir_intrinsic_image_size: {
132          if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
133              nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
134             sh_info().has_txq_cube_array_z_comp = true;
135       }
136 
137 
138 
139       default:
140          ;
141       }
142 
143 
144    }
145    default:
146       ;
147    }
148 
149    return scan_sysvalue_access(instr);
150 }
151 
get_chip_class(void) const152 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
153 {
154   return m_chip_class;
155 }
156 
allocate_reserved_registers()157 bool ShaderFromNirProcessor::allocate_reserved_registers()
158 {
159    bool retval = do_allocate_reserved_registers();
160    m_ssbo_instr.load_rat_return_address();
161    if (sh_info().uses_atomics)
162       m_ssbo_instr.load_atomic_inc_limits();
163    m_ssbo_instr.set_ssbo_offset(m_image_count);
164    return retval;
165 }
166 
remap_shader_info(r600_shader & sh_info,std::vector<rename_reg_pair> & map,UNUSED ValueMap & values)167 static void remap_shader_info(r600_shader& sh_info,
168                               std::vector<rename_reg_pair>& map,
169                               UNUSED ValueMap& values)
170 {
171    for (unsigned i = 0; i < sh_info.num_arrays; ++i) {
172       auto new_index = map[sh_info.arrays[i].gpr_start];
173       if (new_index.valid)
174          sh_info.arrays[i].gpr_start = new_index.new_reg;
175       map[sh_info.arrays[i].gpr_start].used = true;
176    }
177 
178    for (unsigned i = 0; i < sh_info.ninput; ++i) {
179       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
180               << " of map.size()\n";
181 
182       assert(sh_info.input[i].gpr < map.size());
183       auto new_index = map[sh_info.input[i].gpr];
184       if (new_index.valid)
185          sh_info.input[i].gpr = new_index.new_reg;
186       map[sh_info.input[i].gpr].used = true;
187    }
188 
189    for (unsigned i = 0; i < sh_info.noutput; ++i) {
190       assert(sh_info.output[i].gpr < map.size());
191       auto new_index = map[sh_info.output[i].gpr];
192       if (new_index.valid)
193          sh_info.output[i].gpr = new_index.new_reg;
194       map[sh_info.output[i].gpr].used = true;
195    }
196 }
197 
remap_registers()198 void ShaderFromNirProcessor::remap_registers()
199 {
200    // register renumbering
201    auto rc = register_count();
202    if (!rc)
203       return;
204 
205    std::vector<register_live_range> register_live_ranges(rc);
206 
207    auto temp_register_map = get_temp_registers();
208 
209    Shader sh{m_output, temp_register_map};
210    LiverangeEvaluator().run(sh, register_live_ranges);
211    auto register_map = get_temp_registers_remapping(register_live_ranges);
212 
213    sfn_log << SfnLog::merge << "=========Mapping===========\n";
214    for (size_t  i = 0; i < register_map.size(); ++i)
215       if (register_map[i].valid)
216          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
217 
218    ValueRemapper vmap0(register_map, temp_register_map);
219    for (auto& block: m_output)
220       block.remap_registers(vmap0);
221 
222    remap_shader_info(m_sh_info, register_map, temp_register_map);
223 
224    /* Mark inputs as used registers, these registers should no be remapped */
225    for (auto& v: sh.m_temp) {
226       if (v.second->type() == Value::gpr) {
227          const auto& g = static_cast<const GPRValue&>(*v.second);
228          if (g.is_input())
229             register_map[g.sel()].used = true;
230       }
231    }
232 
233    int new_index = 0;
234    for (auto& i : register_map) {
235       i.valid = i.used;
236       if (i.used)
237          i.new_reg = new_index++;
238    }
239 
240    ValueRemapper vmap1(register_map, temp_register_map);
241    for (auto& ir: m_output)
242       ir.remap_registers(vmap1);
243 
244    remap_shader_info(m_sh_info, register_map, temp_register_map);
245 }
246 
process_uniforms(nir_variable * uniform)247 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
248 {
249    // m_uniform_type_map
250    m_uniform_type_map[uniform->data.location] = uniform->type;
251 
252    if (uniform->type->contains_atomic()) {
253       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
254       sh_info().nhwatomic += natomics;
255 
256       if (uniform->type->is_array())
257          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
258 
259       sh_info().uses_atomics = 1;
260 
261       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
262       ++sh_info().nhwatomic_ranges;
263       atom.buffer_id = uniform->data.binding;
264       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
265 
266       atom.start = uniform->data.offset >> 2;
267       atom.end = atom.start + natomics - 1;
268 
269       if (m_atomic_base_map.find(uniform->data.binding) ==
270           m_atomic_base_map.end())
271          m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
272 
273       m_next_hwatomic_loc += natomics;
274 
275       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
276 
277       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
278               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
279    }
280 
281    auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
282    if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
283       sh_info().uses_images = 1;
284       if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
285          sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE;
286    }
287 
288    return true;
289 }
290 
set_shader_info(const nir_shader * sh)291 void ShaderFromNirProcessor::set_shader_info(const nir_shader *sh)
292 {
293    m_image_count = sh->info.num_images;
294    do_set_shader_info(sh);
295 }
296 
do_set_shader_info(const nir_shader * sh)297 void ShaderFromNirProcessor::do_set_shader_info(const nir_shader *sh)
298 {
299    (void)sh;
300 }
301 
scan_inputs_read(const nir_shader * sh)302 bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh)
303 {
304    return true;
305 }
306 
set_var_address(nir_deref_instr * instr)307 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
308 {
309    auto& dest = instr->dest;
310    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
311    assert(util_bitcount(instr->modes) == 1);
312    m_var_mode[instr->var] = instr->modes;
313    m_var_derefs[index] = instr->var;
314 
315    sfn_log << SfnLog::io << "Add var deref:" << index
316            << " with DDL:" << instr->var->data.driver_location << "\n";
317 }
318 
evaluate_spi_sid(r600_shader_io & io)319 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
320 {
321    switch (io.name) {
322    case TGSI_SEMANTIC_POSITION:
323    case TGSI_SEMANTIC_PSIZE:
324    case TGSI_SEMANTIC_EDGEFLAG:
325    case TGSI_SEMANTIC_FACE:
326    case TGSI_SEMANTIC_SAMPLEMASK:
327    case TGSI_SEMANTIC_CLIPVERTEX:
328       io.spi_sid = 0;
329       break;
330    case TGSI_SEMANTIC_GENERIC:
331    case TGSI_SEMANTIC_TEXCOORD:
332    case TGSI_SEMANTIC_PCOORD:
333       io.spi_sid = io.sid + 1;
334       break;
335    default:
336       /* For non-generic params - pack name and sid into 8 bits */
337       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
338    }
339 }
340 
get_deref_location(const nir_src & src) const341 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
342 {
343    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
344 
345    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
346 
347    auto v = m_var_derefs.find(index);
348    if (v != m_var_derefs.end())
349       return v->second;
350 
351      fprintf(stderr, "R600: could not find deref with index %d\n", index);
352 
353      return nullptr;
354 
355    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
356    return  nir_deref_instr_get_variable(deref); */
357 }
358 
emit_tex_instruction(nir_instr * instr)359 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
360 {
361    return m_tex_instr.emit(instr);
362 }
363 
emit_instruction(AluInstruction * ir)364 void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
365 {
366    if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
367       for (unsigned i = 0; i < ir->n_sources(); ++i) {
368          auto& s = ir->src(i);
369          if (s.type() == Value::kconst) {
370             auto& c = static_cast<UniformValue&>(s);
371             if (c.addr()) {
372                last_emitted_alu->set_flag(alu_last_instr);
373                break;
374             }
375          }
376       }
377    }
378    last_emitted_alu = ir;
379    emit_instruction_internal(ir);
380 }
381 
382 
emit_instruction(Instruction * ir)383 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
384 {
385 
386    emit_instruction_internal(ir);
387    last_emitted_alu = nullptr;
388 }
389 
emit_instruction_internal(Instruction * ir)390 void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
391 {
392    if (m_pending_else) {
393       append_block(-1);
394       m_output.back().emit(PInstruction(m_pending_else));
395       append_block(1);
396       m_pending_else = nullptr;
397    }
398 
399    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
400    if (m_output.empty())
401       append_block(0);
402 
403    m_output.back().emit(Instruction::Pointer(ir));
404 }
405 
emit_shader_start()406 void ShaderFromNirProcessor::emit_shader_start()
407 {
408    /* placeholder, may become an abstract method */
409    m_ssbo_instr.set_ssbo_offset(m_image_count);
410 }
411 
emit_jump_instruction(nir_jump_instr * instr)412 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
413 {
414    switch (instr->type) {
415    case nir_jump_break: {
416       auto b = new LoopBreakInstruction();
417       emit_instruction(b);
418       return true;
419    }
420    case nir_jump_continue: {
421       auto  b = new LoopContInstruction();
422       emit_instruction(b);
423       return true;
424    }
425    default: {
426       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
427       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
428       return false;
429    }
430    }
431    return true;
432 }
433 
emit_alu_instruction(nir_instr * instr)434 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
435 {
436    return m_alu_instr.emit(instr);
437 }
438 
emit_deref_instruction_override(UNUSED nir_deref_instr * instr)439 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
440 {
441    return false;
442 }
443 
emit_loop_start(int loop_id)444 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
445 {
446    LoopBeginInstruction *loop = new LoopBeginInstruction();
447    emit_instruction(loop);
448    m_loop_begin_block_map[loop_id] = loop;
449    append_block(1);
450    return true;
451 }
emit_loop_end(int loop_id)452 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
453 {
454    auto start = m_loop_begin_block_map.find(loop_id);
455    if (start == m_loop_begin_block_map.end()) {
456       sfn_log << SfnLog::err  << "End loop: Loop start for "
457               << loop_id << "  not found\n";
458       return false;
459    }
460    m_nesting_depth--;
461    m_block_number++;
462    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
463    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
464    emit_instruction(loop);
465 
466    m_loop_begin_block_map.erase(start);
467    return true;
468 }
469 
emit_if_start(int if_id,nir_if * if_stmt)470 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
471 {
472 
473    auto value = from_nir(if_stmt->condition, 0, 0);
474    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
475                                              value, Value::zero, EmitInstruction::last);
476    pred->set_flag(alu_update_exec);
477    pred->set_flag(alu_update_pred);
478    pred->set_cf_type(cf_alu_push_before);
479 
480    append_block(1);
481 
482    IfInstruction *ir = new IfInstruction(pred);
483    emit_instruction(ir);
484    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
485    m_if_block_start_map[if_id] = ir;
486    return true;
487 }
488 
emit_else_start(int if_id)489 bool ShaderFromNirProcessor::emit_else_start(int if_id)
490 {
491    auto iif = m_if_block_start_map.find(if_id);
492    if (iif == m_if_block_start_map.end()) {
493       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
494       return false;
495    }
496 
497    if (iif->second->type() != Instruction::cond_if) {
498       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
499       return false;
500    }
501    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
502    ElseInstruction *ir = new ElseInstruction(if_instr);
503    m_if_block_start_map[if_id] = ir;
504    m_pending_else = ir;
505 
506    return true;
507 }
508 
emit_ifelse_end(int if_id)509 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
510 {
511    auto ifelse = m_if_block_start_map.find(if_id);
512    if (ifelse == m_if_block_start_map.end()) {
513       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
514       return false;
515    }
516 
517    if (ifelse->second->type() != Instruction::cond_if &&
518        ifelse->second->type() != Instruction::cond_else) {
519       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
520       return false;
521    }
522    /* Clear pending else, if the else branch was empty, non will be emitted */
523 
524    m_pending_else = nullptr;
525 
526    append_block(-1);
527    IfElseEndInstruction *ir = new IfElseEndInstruction();
528    emit_instruction(ir);
529 
530    return true;
531 }
532 
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)533 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
534 {
535    PValue src = get_temp_register();
536    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
537 
538    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
539    emit_instruction(new FetchTCSIOParam(dest, src, offset));
540 
541    return true;
542 
543 }
544 
emit_load_local_shared(nir_intrinsic_instr * instr)545 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
546 {
547    auto address = varvec_from_nir(instr->src[0], instr->num_components);
548    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
549 
550    emit_instruction(new LDSReadInstruction(address, dest_value));
551    return true;
552 }
553 
554 static unsigned
lds_op_from_intrinsic(nir_intrinsic_op op)555 lds_op_from_intrinsic(nir_intrinsic_op op) {
556    switch (op) {
557    case nir_intrinsic_shared_atomic_add:
558       return LDS_OP2_LDS_ADD_RET;
559    case nir_intrinsic_shared_atomic_and:
560       return LDS_OP2_LDS_AND_RET;
561    case nir_intrinsic_shared_atomic_or:
562       return LDS_OP2_LDS_OR_RET;
563    case nir_intrinsic_shared_atomic_imax:
564       return LDS_OP2_LDS_MAX_INT_RET;
565    case nir_intrinsic_shared_atomic_umax:
566       return LDS_OP2_LDS_MAX_UINT_RET;
567    case nir_intrinsic_shared_atomic_imin:
568       return LDS_OP2_LDS_MIN_INT_RET;
569    case nir_intrinsic_shared_atomic_umin:
570       return LDS_OP2_LDS_MIN_UINT_RET;
571    case nir_intrinsic_shared_atomic_xor:
572       return LDS_OP2_LDS_XOR_RET;
573    case nir_intrinsic_shared_atomic_exchange:
574       return LDS_OP2_LDS_XCHG_RET;
575    case nir_intrinsic_shared_atomic_comp_swap:
576       return LDS_OP3_LDS_CMP_XCHG_RET;
577    default:
578       unreachable("Unsupported shared atomic opcode");
579    }
580 }
581 
emit_atomic_local_shared(nir_intrinsic_instr * instr)582 bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
583 {
584    auto address = from_nir(instr->src[0], 0);
585    auto dest_value = from_nir(instr->dest, 0);
586    auto value = from_nir(instr->src[1], 0);
587    auto op = lds_op_from_intrinsic(instr->intrinsic);
588 
589    if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
590       auto value2 = from_nir(instr->src[2], 0);
591       emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
592    } else {
593       emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
594    }
595    return true;
596 }
597 
598 
emit_store_local_shared(nir_intrinsic_instr * instr)599 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
600 {
601    unsigned write_mask = nir_intrinsic_write_mask(instr);
602 
603    auto address = from_nir(instr->src[1], 0);
604    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
605    write_mask |= write_mask >> 2;
606 
607    auto value =  from_nir(instr->src[0], swizzle_base);
608    if (!(write_mask & 2)) {
609       emit_instruction(new LDSWriteInstruction(address, 0, value));
610    } else {
611       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
612       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
613    }
614 
615    return true;
616 }
617 
emit_intrinsic_instruction(nir_intrinsic_instr * instr)618 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
619 {
620    r600::sfn_log << SfnLog::instr << "emit '"
621                  << *reinterpret_cast<nir_instr*>(instr)
622                  << "' (" << __func__ << ")\n";
623 
624    if (emit_intrinsic_instruction_override(instr))
625       return true;
626 
627    if (m_ssbo_instr.emit(&instr->instr)) {
628       m_sel.info.writes_memory = true;
629       return true;
630    }
631 
632    switch (instr->intrinsic) {
633    case nir_intrinsic_load_deref: {
634       auto var = get_deref_location(instr->src[0]);
635       if (!var)
636          return false;
637       auto mode_helper = m_var_mode.find(var);
638       if (mode_helper == m_var_mode.end()) {
639          cerr << "r600-nir: variable '" << var->name << "' not found\n";
640          return false;
641       }
642       switch (mode_helper->second) {
643       case nir_var_function_temp:
644          return emit_load_function_temp(var, instr);
645       default:
646          cerr << "r600-nir: Unsupported mode" << mode_helper->second
647               << "for src variable\n";
648          return false;
649       }
650    }
651    case nir_intrinsic_store_scratch:
652       return emit_store_scratch(instr);
653    case nir_intrinsic_load_scratch:
654       return emit_load_scratch(instr);
655    case nir_intrinsic_load_uniform:
656       return load_uniform(instr);
657    case nir_intrinsic_discard:
658    case nir_intrinsic_discard_if:
659       return emit_discard_if(instr);
660    case nir_intrinsic_load_ubo_vec4:
661       return emit_load_ubo_vec4(instr);
662    case nir_intrinsic_load_tcs_in_param_base_r600:
663       return emit_load_tcs_param_base(instr, 0);
664    case nir_intrinsic_load_tcs_out_param_base_r600:
665       return emit_load_tcs_param_base(instr, 16);
666    case nir_intrinsic_load_local_shared_r600:
667    case nir_intrinsic_load_shared:
668       return emit_load_local_shared(instr);
669    case nir_intrinsic_store_local_shared_r600:
670    case nir_intrinsic_store_shared:
671       return emit_store_local_shared(instr);
672    case nir_intrinsic_control_barrier:
673    case nir_intrinsic_memory_barrier_tcs_patch:
674    case nir_intrinsic_memory_barrier_shared:
675    case nir_intrinsic_memory_barrier_buffer:
676    case nir_intrinsic_memory_barrier:
677    case nir_intrinsic_memory_barrier_image:
678    case nir_intrinsic_group_memory_barrier:
679       return emit_barrier(instr);
680    case nir_intrinsic_memory_barrier_atomic_counter:
681       return true;
682    case nir_intrinsic_shared_atomic_add:
683    case nir_intrinsic_shared_atomic_and:
684    case nir_intrinsic_shared_atomic_or:
685    case nir_intrinsic_shared_atomic_imax:
686    case nir_intrinsic_shared_atomic_umax:
687    case nir_intrinsic_shared_atomic_imin:
688    case nir_intrinsic_shared_atomic_umin:
689    case nir_intrinsic_shared_atomic_xor:
690    case nir_intrinsic_shared_atomic_exchange:
691    case nir_intrinsic_shared_atomic_comp_swap:
692       return emit_atomic_local_shared(instr);
693    case nir_intrinsic_shader_clock:
694       return emit_shader_clock(instr);
695    case nir_intrinsic_copy_deref:
696    case nir_intrinsic_load_constant:
697    case nir_intrinsic_load_input:
698    case nir_intrinsic_store_output:
699 
700    default:
701       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
702       return false;
703    }
704    return false;
705 }
706 
emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr * instr)707 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
708 {
709    return false;
710 }
711 
712 bool
emit_load_function_temp(UNUSED const nir_variable * var,UNUSED nir_intrinsic_instr * instr)713 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
714 {
715    return false;
716 }
717 
emit_barrier(UNUSED nir_intrinsic_instr * instr)718 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
719 {
720    AluInstruction *ir = new AluInstruction(op0_group_barrier);
721    ir->set_flag(alu_last_instr);
722    emit_instruction(ir);
723    return true;
724 }
725 
726 
load_preloaded_value(const nir_dest & dest,int chan,PValue value,bool as_last)727 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
728 {
729    if (!dest.is_ssa) {
730       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
731       if (as_last)
732          ir->set_flag(alu_last_instr);
733       emit_instruction(ir);
734    } else {
735       inject_register(dest.ssa.index, chan, value, true);
736    }
737    return true;
738 }
739 
emit_store_scratch(nir_intrinsic_instr * instr)740 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
741 {
742    PValue address = from_nir(instr->src[1], 0, 0);
743 
744    auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
745          swizzle_from_comps(instr->num_components));
746 
747    int writemask = nir_intrinsic_write_mask(instr);
748    int align = nir_intrinsic_align_mul(instr);
749    int align_offset = nir_intrinsic_align_offset(instr);
750 
751    WriteScratchInstruction *ir = nullptr;
752    if (address->type() == Value::literal) {
753       const auto& lv = static_cast<const LiteralValue&>(*address);
754       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
755    } else {
756       address = from_nir_with_fetch_constant(instr->src[1], 0);
757       ir = new WriteScratchInstruction(address, value, align, align_offset,
758                                        writemask, m_scratch_size);
759    }
760    emit_instruction(ir);
761    sh_info().needs_scratch_space = 1;
762    return true;
763 }
764 
emit_load_scratch(nir_intrinsic_instr * instr)765 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
766 {
767    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
768    std::array<PValue, 4> dst_val;
769    for (int i = 0; i < 4; ++i)
770       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
771 
772    GPRVector dst(dst_val);
773    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
774    ir->prelude_append(new WaitAck(0));
775    emit_instruction(ir);
776    sh_info().needs_scratch_space = 1;
777    return true;
778 }
779 
emit_shader_clock(nir_intrinsic_instr * instr)780 bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr)
781 {
782    emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0),
783                                        PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write));
784    emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1),
785                                        PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write));
786    return true;
787 }
788 
vec_from_nir_with_fetch_constant(const nir_src & src,unsigned mask,const GPRVector::Swizzle & swizzle,bool match)789 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
790                                                                    unsigned mask,
791                                                                    const GPRVector::Swizzle& swizzle,
792                                                                    bool match)
793 {
794    bool use_same = true;
795    GPRVector::Values v;
796 
797    std::array<bool,4> used_swizzles = {false, false, false, false};
798 
799    /* Check whether all sources come from a GPR, and,
800     * if requested, whether they are swizzled as expected */
801 
802    for (int i = 0; i < 4 && use_same; ++i)  {
803       if ((1 << i) & mask) {
804          if (swizzle[i] < 4) {
805             v[i] = from_nir(src, swizzle[i]);
806             assert(v[i]);
807             use_same &= (v[i]->type() == Value::gpr);
808             if (match) {
809                use_same &= (v[i]->chan() == swizzle[i]);
810             }
811             used_swizzles[v[i]->chan()] = true;
812          }
813       }
814    }
815 
816 
817    /* Now check whether all inputs come from the same GPR, and fill
818     * empty slots in the vector with unused swizzles, bail out if
819     * the sources are not from the same GPR
820     */
821 
822    if (use_same) {
823       int next_free_swizzle = 0;
824       while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
825          next_free_swizzle++;
826 
827       /* Find the first GPR index used */
828       int i = 0;
829       while (!v[i] && i < 4) ++i;
830       assert(i < 4);
831       unsigned sel = v[i]->sel();
832 
833 
834       for (i = 0; i < 4 && use_same; ++i) {
835          if (!v[i]) {
836             if (swizzle[i] >= 4)
837                v[i] = PValue(new GPRValue(sel, swizzle[i]));
838             else {
839                assert(next_free_swizzle < 4);
840                v[i] = PValue(new GPRValue(sel, next_free_swizzle));
841                used_swizzles[next_free_swizzle] = true;
842                while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
843                   next_free_swizzle++;
844             }
845          }
846          else
847             use_same &= v[i]->sel() == sel;
848       }
849    }
850 
851    /* We can't re-use the source data because they either need re-swizzling, or
852     * they didn't come all from a GPR or the same GPR, so copy to a new vector
853     */
854    if (!use_same) {
855       AluInstruction *ir = nullptr;
856       GPRVector result = get_temp_vec4(swizzle);
857       for (int i = 0; i < 4; ++i) {
858          if (swizzle[i] < 4 && (mask & (1 << i))) {
859             ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
860                                     EmitInstruction::write);
861             emit_instruction(ir);
862          }
863       }
864       if (ir)
865          ir->set_flag(alu_last_instr);
866       return result;
867    } else
868       return GPRVector(v);;
869 }
870 
emit_load_ubo_vec4(nir_intrinsic_instr * instr)871 bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
872 {
873    auto bufid = nir_src_as_const_value(instr->src[0]);
874    auto buf_offset = nir_src_as_const_value(instr->src[1]);
875 
876    if (!buf_offset) {
877       /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
878        * on the ALU block, and this would probably make sense when there are more then one
879        * loads with the same buffer ID. */
880 
881       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
882       GPRVector trgt;
883       std::array<int, 4> swz = {7,7,7,7};
884       for (unsigned i = 0; i < 4; ++i) {
885          if (i < nir_dest_num_components(instr->dest)) {
886             trgt.set_reg_i(i, from_nir(instr->dest, i));
887             swz[i] = i + nir_intrinsic_component(instr);
888          } else {
889             trgt.set_reg_i(i, from_nir(instr->dest, 7));
890          }
891       }
892 
893       FetchInstruction *ir;
894       if (bufid) {
895          ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
896                                               1 + bufid->u32, nullptr, bim_none);
897       } else {
898          PValue bufid = from_nir(instr->src[0], 0, 0);
899          ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
900                                               1, bufid, bim_zero);
901       }
902       ir->set_dest_swizzle(swz);
903       emit_instruction(ir);
904       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
905       return true;
906    }
907 
908 
909    if (bufid) {
910       int buf_cmp = nir_intrinsic_component(instr);
911       AluInstruction *ir = nullptr;
912       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
913          int cmp = buf_cmp + i;
914          assert(cmp < 4);
915          auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
916          if (instr->dest.is_ssa)
917             load_preloaded_value(instr->dest, i, u);
918          else {
919             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
920             emit_instruction(ir);
921          }
922       }
923       if (ir)
924          ir->set_flag(alu_last_instr);
925       return true;
926 
927    } else {
928       int buf_cmp = nir_intrinsic_component(instr);
929       AluInstruction *ir = nullptr;
930       auto kc_id = from_nir(instr->src[0], 0);
931       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
932          int cmp = buf_cmp + i;
933          auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, kc_id));
934          if (instr->dest.is_ssa)
935             load_preloaded_value(instr->dest, i, u);
936          else {
937             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
938             emit_instruction(ir);
939          }
940       }
941       if (ir)
942          ir->set_flag(alu_last_instr);
943       return true;
944    }
945 }
946 
emit_discard_if(nir_intrinsic_instr * instr)947 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
948 {
949    r600::sfn_log << SfnLog::instr << "emit '"
950                  << *reinterpret_cast<nir_instr*>(instr)
951                  << "' (" << __func__ << ")\n";
952 
953    if (instr->intrinsic == nir_intrinsic_discard_if) {
954       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
955                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
956 
957    } else {
958       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
959                        {Value::zero, Value::zero}, {alu_last_instr}));
960    }
961    m_sh_info.uses_kill = 1;
962    return true;
963 }
964 
load_uniform(nir_intrinsic_instr * instr)965 bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
966 {
967    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
968                  << *reinterpret_cast<nir_instr*>(instr)
969                  << "'\n";
970 
971 
972    /* If the target register is a SSA register and the loading is not
973     * indirect then we can do lazy loading, i.e. the uniform value can
974     * be used directly. Otherwise we have to load the data for real
975     * rigt away.
976     */
977    auto literal = nir_src_as_const_value(instr->src[0]);
978    int base = nir_intrinsic_base(instr);
979 
980    if (literal) {
981       AluInstruction *ir = nullptr;
982       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
983          PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
984          sfn_log << SfnLog::io << "uniform "
985                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
986 
987          if (instr->dest.is_ssa)
988             load_preloaded_value(instr->dest, i, u);
989          else {
990             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
991                                                    u, {alu_write});
992              emit_instruction(ir);
993          }
994       }
995       if (ir)
996          ir->set_flag(alu_last_instr);
997    } else {
998       PValue addr = from_nir(instr->src[0], 0, 0);
999       return load_uniform_indirect(instr, addr, 16 * base, 0);
1000    }
1001    return true;
1002 }
1003 
load_uniform_indirect(nir_intrinsic_instr * instr,PValue addr,int offest,int bufferid)1004 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
1005 {
1006    if (!addr) {
1007       std::cerr << "r600-nir: don't know how uniform is addressed\n";
1008       return false;
1009    }
1010 
1011    GPRVector trgt;
1012    std::array<int, 4> swz = {7,7,7,7};
1013    for (int i = 0; i < 4; ++i) {
1014       trgt.set_reg_i(i, from_nir(instr->dest, i));
1015       swz[i] = i;
1016    }
1017 
1018    if (addr->type() != Value::gpr) {
1019       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
1020       addr = trgt.reg_i(0);
1021    }
1022 
1023    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
1024                                   bufferid, PValue(), bim_none);
1025    ir->set_dest_swizzle(swz);
1026    emit_instruction(ir);
1027    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
1028    return true;
1029 }
1030 
emit_load_literal(const nir_load_const_instr * literal,const nir_src & src,unsigned writemask)1031 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
1032 {
1033    AluInstruction *ir = nullptr;
1034    for (int i = 0; i < literal->def.num_components ; ++i) {
1035       if (writemask & (1 << i)){
1036          PValue lsrc;
1037          switch (literal->def.bit_size) {
1038 
1039          case 1:
1040             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
1041             lsrc = literal->value[i].b ?
1042                      PValue(new LiteralValue( 0xffffffff, i)) :
1043                      Value::zero;
1044             break;
1045          case 32:
1046             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
1047             if (literal->value[i].u32 == 0)
1048                lsrc = Value::zero;
1049             else if (literal->value[i].u32 == 1)
1050                lsrc = Value::one_i;
1051             else if (literal->value[i].f32 == 1.0f)
1052                lsrc = Value::one_f;
1053             else if (literal->value[i].f32 == 0.5f)
1054                lsrc = Value::zero_dot_5;
1055             else
1056                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1057             break;
1058          default:
1059             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
1060                     << " falling back to 32 bit\n";
1061             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1062          }
1063          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
1064 
1065          emit_instruction(ir);
1066       }
1067    }
1068    return ir;
1069 }
1070 
from_nir_with_fetch_constant(const nir_src & src,unsigned component,int channel)1071 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
1072 {
1073    PValue value = from_nir(src, component);
1074    if (value->type() != Value::gpr &&
1075        value->type() != Value::gpr_vector &&
1076        value->type() != Value::gpr_array_value) {
1077       PValue retval = get_temp_register(channel);
1078       emit_instruction(new AluInstruction(op1_mov, retval, value,
1079                                           EmitInstruction::last_write));
1080       value = retval;
1081    }
1082    return value;
1083 }
1084 
emit_deref_instruction(nir_deref_instr * instr)1085 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
1086 {
1087    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
1088                  << *reinterpret_cast<nir_instr*>(instr)
1089                  << "'\n";
1090 
1091    /* Give the specific shader type a chance to process this, i.e. Geometry and
1092     * tesselation shaders need specialized deref_array, for the other shaders
1093     * it is lowered.
1094     */
1095    if (emit_deref_instruction_override(instr))
1096       return true;
1097 
1098    switch (instr->deref_type) {
1099    case nir_deref_type_var:
1100       set_var_address(instr);
1101       return true;
1102    case nir_deref_type_array:
1103    case nir_deref_type_array_wildcard:
1104    case nir_deref_type_struct:
1105    case nir_deref_type_cast:
1106    default:
1107       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
1108    }
1109    return false;
1110 }
1111 
emit_instruction(EAluOp opcode,PValue dest,std::vector<PValue> srcs,const std::set<AluModifiers> & m_flags)1112 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
1113                                               std::vector<PValue> srcs,
1114                                               const std::set<AluModifiers>& m_flags)
1115 {
1116    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
1117    emit_instruction(ir);
1118    return true;
1119 }
1120 
add_param_output_reg(int loc,const GPRVector * gpr)1121 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1122 {
1123    m_output_register_map[loc] = gpr;
1124 }
1125 
emit_export_instruction(WriteoutInstruction * ir)1126 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1127 {
1128    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
1129    m_export_output.emit(PInstruction(ir));
1130 }
1131 
output_register(unsigned location) const1132 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1133 {
1134    const GPRVector *retval = nullptr;
1135    auto val = m_output_register_map.find(location);
1136    if (val != m_output_register_map.end())
1137       retval =  val->second;
1138    return retval;
1139 }
1140 
set_input(unsigned pos,PValue var)1141 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1142 {
1143    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
1144    m_inputs[pos] = var;
1145 }
1146 
set_output(unsigned pos,int sel)1147 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1148 {
1149    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
1150    m_outputs[pos] = sel;
1151 }
1152 
append_block(int nesting_change)1153 void ShaderFromNirProcessor::append_block(int nesting_change)
1154 {
1155    m_nesting_depth += nesting_change;
1156    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1157 }
1158 
get_array_info(r600_shader & shader) const1159 void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const
1160 {
1161    shader.num_arrays = m_reg_arrays.size();
1162    if (shader.num_arrays) {
1163       shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array));
1164       for (unsigned i = 0; i < shader.num_arrays; ++i) {
1165          shader.arrays[i].comp_mask = m_reg_arrays[i]->mask();
1166          shader.arrays[i].gpr_start = m_reg_arrays[i]->sel();
1167          shader.arrays[i].gpr_count = m_reg_arrays[i]->size();
1168       }
1169       shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
1170    }
1171 }
1172 
finalize()1173 void ShaderFromNirProcessor::finalize()
1174 {
1175    do_finalize();
1176 
1177    for (auto& i : m_inputs)
1178       m_sh_info.input[i.first].gpr = i.second->sel();
1179 
1180    for (auto& i : m_outputs)
1181       m_sh_info.output[i.first].gpr = i.second;
1182 
1183    m_output.push_back(m_export_output);
1184 }
1185 
1186 }
1187