1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
34 
35 #include "../r600_shader.h"
36 #include "../eg_sq.h"
37 
38 namespace r600 {
39 
40 using std::vector;
41 
42 
43 
44 struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor {
45 
46    AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
47 
48 
49    bool emit(const Instruction::Pointer i);
reset_addr_registerr600::AssemblyFromShaderLegacyImpl50    void reset_addr_register() {m_last_addr.reset();}
51 
52 public:
53    bool visit(const AluInstruction& i) override;
54    bool visit(const ExportInstruction& i) override;
55    bool visit(const TexInstruction& i) override;
56    bool visit(const FetchInstruction& i) override;
57    bool visit(const IfInstruction& i) override;
58    bool visit(const ElseInstruction& i) override;
59    bool visit(const IfElseEndInstruction& i) override;
60    bool visit(const LoopBeginInstruction& i) override;
61    bool visit(const LoopEndInstruction& i) override;
62    bool visit(const LoopBreakInstruction& i) override;
63    bool visit(const LoopContInstruction& i) override;
64    bool visit(const StreamOutIntruction& i) override;
65    bool visit(const MemRingOutIntruction& i) override;
66    bool visit(const EmitVertex& i) override;
67    bool visit(const WaitAck& i) override;
68    bool visit(const WriteScratchInstruction& i) override;
69    bool visit(const GDSInstr& i) override;
70    bool visit(const RatInstruction& i) override;
71    bool visit(const LDSWriteInstruction& i) override;
72    bool visit(const LDSReadInstruction& i) override;
73    bool visit(const LDSAtomicInstruction& i) override;
74    bool visit(const GDSStoreTessFactor& i) override;
75    bool visit(const InstructionBlock& i) override;
76 
77    bool emit_load_addr(PValue addr);
78    bool emit_fs_pixel_export(const ExportInstruction & exi);
79    bool emit_vs_pos_export(const ExportInstruction & exi);
80    bool emit_vs_param_export(const ExportInstruction & exi);
81    bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
82    bool copy_src(r600_bytecode_alu_src& src, const Value& s);
83 
84    EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
85 
86    ConditionalJumpTracker m_jump_tracker;
87    CallStack m_callstack;
88 
89 public:
90    r600_bytecode *m_bc;
91    r600_shader *m_shader;
92    r600_shader_key *m_key;
93    r600_bytecode_output m_output;
94    unsigned m_max_color_exports;
95    bool has_pos_output;
96    bool has_param_output;
97    PValue m_last_addr;
98    int m_loop_nesting;
99    std::set<uint32_t> m_nliterals_in_group;
100    std::set<int> vtx_fetch_results;
101    std::set<int> tex_fetch_results;
102    bool m_last_op_was_barrier;
103 };
104 
105 
AssemblyFromShaderLegacy(struct r600_shader * sh,r600_shader_key * key)106 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
107                                                    r600_shader_key *key)
108 {
109    impl = new AssemblyFromShaderLegacyImpl(sh, key);
110 }
111 
~AssemblyFromShaderLegacy()112 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
113 {
114    delete impl;
115 }
116 
do_lower(const std::vector<InstructionBlock> & ir)117 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
118 {
119    if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
120        impl->m_shader->ninput > 0)
121          r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
122 
123 
124    std::vector<Instruction::Pointer> exports;
125 
126    for (const auto& block : ir) {
127       if (!impl->visit(block))
128          return false;
129    }   /*
130    for (const auto& i : exports) {
131       if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
132           return false;
133    }*/
134 
135 
136    const struct cf_op_info *last = nullptr;
137    if (impl->m_bc->cf_last)
138       last = r600_isa_cf(impl->m_bc->cf_last->op);
139 
140    /* alu clause instructions don't have EOP bit, so add NOP */
141    if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
142        || impl->m_bc->cf_last->op == CF_OP_POP)
143       r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
144 
145     /* A fetch shader only can't be EOP (results in hang), but we can replace it
146      * by a NOP */
147    else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
148       impl->m_bc->cf_last->op = CF_OP_NOP;
149 
150    if (impl->m_shader->bc.chip_class != CAYMAN)
151       impl->m_bc->cf_last->end_of_program = 1;
152    else
153       cm_bytecode_add_cf_end(impl->m_bc);
154 
155    return true;
156 }
157 
visit(const InstructionBlock & block)158 bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block)
159 {
160    for (const auto& i : block) {
161 
162       if (i->type() != Instruction::vtx) {
163           vtx_fetch_results.clear();
164           if (i->type() != Instruction::tex)
165               tex_fetch_results.clear();
166       }
167 
168       m_last_op_was_barrier &= i->type() == Instruction::alu;
169 
170       sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
171 
172       if (!i->accept(*this))
173          return false;
174 
175       if (i->type() != Instruction::alu)
176          reset_addr_register();
177    }
178 
179    return true;
180 }
181 
AssemblyFromShaderLegacyImpl(r600_shader * sh,r600_shader_key * key)182 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
183                                                            r600_shader_key *key):
184    m_callstack(sh->bc),
185    m_bc(&sh->bc),
186    m_shader(sh),
187    m_key(key),
188    has_pos_output(false),
189    has_param_output(false),
190    m_loop_nesting(0),
191    m_last_op_was_barrier(false)
192 {
193    m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
194 
195 }
196 
197 extern const std::map<EAluOp, int> opcode_map;
198 
emit_load_addr(PValue addr)199 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
200 {
201    m_bc->ar_reg = addr->sel();
202    m_bc->ar_chan = addr->chan();
203    m_bc->ar_loaded = 0;
204    m_last_addr = addr;
205 
206    sfn_log << SfnLog::assembly << "   Prepare " << *addr << " to address register\n";
207 
208    return true;
209 }
210 
visit(const AluInstruction & ai)211 bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
212 {
213 
214    struct r600_bytecode_alu alu;
215    memset(&alu, 0, sizeof(alu));
216    PValue addr_in_use;
217 
218    if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
219       std::cerr << "Opcode not handled for " << ai <<"\n";
220       return false;
221    }
222 
223    if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
224       return true;
225 
226    m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
227 
228    for (unsigned i = 0; i < ai.n_sources(); ++i) {
229       auto& s = ai.src(i);
230       if (s.type() == Value::literal) {
231          auto& v = static_cast<const LiteralValue&>(s);
232          if (v.value() != 0 &&
233              v.value() != 1 &&
234              v.value_float() != 1.0f &&
235              v.value_float() != 0.5f &&
236              v.value() != 0xffffffff)
237             m_nliterals_in_group.insert(v.value());
238       }
239    }
240 
241    /* This instruction group would exceed the limit of literals, so
242     * force a new instruction group by adding a NOP as last
243     * instruction. This will no loner be needed with a real
244     * scheduler */
245    if (m_nliterals_in_group.size() > 4) {
246       sfn_log << SfnLog::assembly << "  Have " << m_nliterals_in_group.size() << " inject a last op (nop)\n";
247       alu.op = ALU_OP0_NOP;
248       alu.last = 1;
249       alu.dst.chan = 3;
250       int retval = r600_bytecode_add_alu(m_bc, &alu);
251       if (retval)
252          return false;
253       memset(&alu, 0, sizeof(alu));
254       m_nliterals_in_group.clear();
255       for (unsigned i = 0; i < ai.n_sources(); ++i) {
256          auto& s = ai.src(i);
257          if (s.type() == Value::literal) {
258             auto& v = static_cast<const LiteralValue&>(s);
259             m_nliterals_in_group.insert(v.value());
260          }
261       }
262    }
263 
264    alu.op = opcode_map.at(ai.opcode());
265 
266    /* Missing test whether ai actually has a dest */
267    auto dst = ai.dest();
268 
269    if (dst) {
270       if (!copy_dst(alu.dst, *dst))
271          return false;
272 
273       alu.dst.write = ai.flag(alu_write);
274       alu.dst.clamp = ai.flag(alu_dst_clamp);
275 
276       if (dst->type() == Value::gpr_array_value) {
277          auto& v = static_cast<const GPRArrayValue&>(*dst);
278          PValue addr = v.indirect();
279          if (addr) {
280             if (!m_last_addr || *addr != *m_last_addr) {
281                emit_load_addr(addr);
282                addr_in_use = addr;
283             }
284             alu.dst.rel = addr ? 1 : 0;;
285          }
286       }
287    }
288 
289    alu.is_op3 = ai.n_sources() == 3;
290 
291    for (unsigned i = 0; i < ai.n_sources(); ++i) {
292       auto& s = ai.src(i);
293 
294       if (!copy_src(alu.src[i], s))
295          return false;
296       alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
297 
298       if (s.type() == Value::gpr_array_value) {
299          auto& v = static_cast<const GPRArrayValue&>(s);
300          PValue addr = v.indirect();
301          if (addr) {
302             assert(!addr_in_use || (*addr_in_use == *addr));
303             if (!m_last_addr || *addr != *m_last_addr) {
304                emit_load_addr(addr);
305                addr_in_use = addr;
306             }
307             alu.src[i].rel = addr ? 1 : 0;
308          }
309       }
310       if (!alu.is_op3)
311          alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
312    }
313 
314    if (ai.bank_swizzle() != alu_vec_unknown)
315       alu.bank_swizzle_force = ai.bank_swizzle();
316 
317    alu.last = ai.flag(alu_last_instr);
318    alu.update_pred = ai.flag(alu_update_pred);
319    alu.execute_mask = ai.flag(alu_update_exec);
320 
321    /* If the destination register is equal to the last loaded address register
322     * then clear the latter one, because the values will no longer be identical */
323    if (m_last_addr)
324       sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr << "\n";
325 
326    if (dst)
327       sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
328 
329    if (dst && m_last_addr)
330       if (*dst == *m_last_addr) {
331          sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
332          m_last_addr.reset();
333       }
334 
335    auto cf_op = ai.cf_type();
336 
337    unsigned type = 0;
338    switch (cf_op) {
339    case cf_alu: type = CF_OP_ALU; break;
340    case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
341    case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
342    case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
343    case cf_alu_break: type = CF_OP_ALU_BREAK; break;
344    case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
345    case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
346    case cf_alu_extended: type = CF_OP_ALU_EXT; break;
347    default:
348       assert(0 && "cf_alu_undefined should have been replaced");
349    }
350 
351    if (alu.last)
352       m_nliterals_in_group.clear();
353 
354    bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
355 
356    if (ai.opcode() == op1_mova_int)
357       m_bc->ar_loaded = 0;
358 
359    if (ai.opcode() == op1_set_cf_idx0)
360       m_bc->index_loaded[0] = 1;
361 
362    if (ai.opcode() == op1_set_cf_idx1)
363       m_bc->index_loaded[1] = 1;
364 
365 
366    m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
367                           ai.opcode() == op2_killne_int ||
368                           ai.opcode() == op1_set_cf_idx0 ||
369                           ai.opcode() == op1_set_cf_idx1);
370    return retval;
371 }
372 
emit_vs_pos_export(const ExportInstruction & exi)373 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
374 {
375    r600_bytecode_output output;
376    memset(&output, 0, sizeof(output));
377    assert(exi.gpr().type() == Value::gpr_vector);
378    const auto& gpr = exi.gpr();
379    output.gpr = gpr.sel();
380    output.elem_size = 3;
381    output.swizzle_x = gpr.chan_i(0);
382    output.swizzle_y = gpr.chan_i(1);
383    output.swizzle_z = gpr.chan_i(2);
384    output.swizzle_w = gpr.chan_i(3);
385    output.burst_count = 1;
386    output.array_base = 60 + exi.location();
387    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
388    output.type = exi.export_type();
389 
390 
391    if (r600_bytecode_add_output(m_bc, &output)) {
392       R600_ERR("Error adding pixel export at location %d\n", exi.location());
393       return false;
394    }
395 
396    return true;
397 }
398 
399 
emit_vs_param_export(const ExportInstruction & exi)400 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
401 {
402    r600_bytecode_output output;
403    assert(exi.gpr().type() == Value::gpr_vector);
404    const auto& gpr = exi.gpr();
405 
406    memset(&output, 0, sizeof(output));
407    output.gpr = gpr.sel();
408    output.elem_size = 3;
409    output.swizzle_x = gpr.chan_i(0);
410    output.swizzle_y = gpr.chan_i(1);
411    output.swizzle_z = gpr.chan_i(2);
412    output.swizzle_w = gpr.chan_i(3);
413    output.burst_count = 1;
414    output.array_base = exi.location();
415    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
416    output.type = exi.export_type();
417 
418 
419    if (r600_bytecode_add_output(m_bc, &output)) {
420       R600_ERR("Error adding pixel export at location %d\n", exi.location());
421       return false;
422    }
423 
424    return true;
425 }
426 
427 
emit_fs_pixel_export(const ExportInstruction & exi)428 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
429 {
430    if (exi.location() >= m_max_color_exports && exi.location()  < 60) {
431       R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
432                exi.location(), m_max_color_exports);
433       return true;
434    }
435 
436    assert(exi.gpr().type() == Value::gpr_vector);
437    const auto& gpr = exi.gpr();
438 
439    r600_bytecode_output output;
440    memset(&output, 0, sizeof(output));
441 
442    output.gpr = gpr.sel();
443    output.elem_size = 3;
444    output.swizzle_x = gpr.chan_i(0);
445    output.swizzle_y = gpr.chan_i(1);
446    output.swizzle_z = gpr.chan_i(2);
447    output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
448    output.burst_count = 1;
449    output.array_base = exi.location();
450    output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
451    output.type = exi.export_type();
452 
453 
454    if (r600_bytecode_add_output(m_bc, &output)) {
455       R600_ERR("Error adding pixel export at location %d\n", exi.location());
456       return false;
457    }
458 
459    return true;
460 }
461 
462 
visit(const ExportInstruction & exi)463 bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi)
464 {
465    switch (exi.export_type()) {
466    case ExportInstruction::et_pixel:
467       return emit_fs_pixel_export(exi);
468    case ExportInstruction::et_pos:
469       return emit_vs_pos_export(exi);
470    case ExportInstruction::et_param:
471       return emit_vs_param_export(exi);
472    default:
473       R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
474       return false;
475    }
476 }
477 
visit(const IfInstruction & if_instr)478 bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr)
479 {
480    int elems = m_callstack.push(FC_PUSH_VPM);
481    bool needs_workaround = false;
482 
483    if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
484       needs_workaround = true;
485 
486    if (m_bc->family != CHIP_HEMLOCK &&
487        m_bc->family != CHIP_CYPRESS &&
488        m_bc->family != CHIP_JUNIPER) {
489       unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
490       unsigned dmod2 = (elems) % m_bc->stack.entry_size;
491 
492       if (elems && (!dmod1 || !dmod2))
493          needs_workaround = true;
494    }
495 
496    auto& pred = if_instr.pred();
497 
498    if (needs_workaround) {
499       r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
500       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
501       auto new_pred = pred;
502       new_pred.set_cf_type(cf_alu);
503       visit(new_pred);
504    } else
505       visit(pred);
506 
507    r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
508 
509    m_jump_tracker.push(m_bc->cf_last, jt_if);
510    return true;
511 }
512 
visit(UNUSED const ElseInstruction & else_instr)513 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr)
514 {
515    r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
516    m_bc->cf_last->pop_count = 1;
517    return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
518 }
519 
visit(UNUSED const IfElseEndInstruction & endif_instr)520 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr)
521 {
522    m_callstack.pop(FC_PUSH_VPM);
523 
524    unsigned force_pop = m_bc->force_add_cf;
525    if (!force_pop) {
526       int alu_pop = 3;
527       if (m_bc->cf_last) {
528          if (m_bc->cf_last->op == CF_OP_ALU)
529             alu_pop = 0;
530          else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
531             alu_pop = 1;
532       }
533       alu_pop += 1;
534       if (alu_pop == 1) {
535          m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
536          m_bc->force_add_cf = 1;
537       } else if (alu_pop == 2) {
538          m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
539          m_bc->force_add_cf = 1;
540       } else {
541          force_pop = 1;
542       }
543    }
544 
545    if (force_pop) {
546       r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
547       m_bc->cf_last->pop_count = 1;
548       m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
549    }
550 
551    return m_jump_tracker.pop(m_bc->cf_last, jt_if);
552 }
553 
visit(UNUSED const LoopBeginInstruction & instr)554 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr)
555 {
556    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
557    m_jump_tracker.push(m_bc->cf_last, jt_loop);
558    m_callstack.push(FC_LOOP);
559    ++m_loop_nesting;
560    return true;
561 }
562 
visit(UNUSED const LoopEndInstruction & instr)563 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr)
564 {
565    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
566    m_callstack.pop(FC_LOOP);
567    assert(m_loop_nesting);
568    --m_loop_nesting;
569    return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
570 }
571 
visit(UNUSED const LoopBreakInstruction & instr)572 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr)
573 {
574    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
575    return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
576 }
577 
visit(UNUSED const LoopContInstruction & instr)578 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr)
579 {
580    r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
581    return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
582 }
583 
visit(const StreamOutIntruction & so_instr)584 bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr)
585 {
586    struct r600_bytecode_output output;
587    memset(&output, 0, sizeof(struct r600_bytecode_output));
588 
589    output.gpr = so_instr.gpr().sel();
590    output.elem_size = so_instr.element_size();
591    output.array_base = so_instr.array_base();
592    output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
593    output.burst_count = so_instr.burst_count();
594    output.array_size = so_instr.array_size();
595    output.comp_mask = so_instr.comp_mask();
596    output.op = so_instr.op();
597 
598    assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
599 
600 
601    if (r600_bytecode_add_output(m_bc, &output))  {
602       R600_ERR("shader_from_nir: Error creating stream output instruction\n");
603       return false;
604    }
605    return true;
606 }
607 
608 
visit(const MemRingOutIntruction & instr)609 bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr)
610 {
611    struct r600_bytecode_output output;
612    memset(&output, 0, sizeof(struct r600_bytecode_output));
613 
614    output.gpr = instr.gpr().sel();
615    output.type = instr.type();
616    output.elem_size = 3;
617    output.comp_mask = 0xf;
618    output.burst_count = 1;
619    output.op = instr.op();
620    if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
621       output.index_gpr = instr.index_reg();
622       output.array_size = 0xfff;
623    }
624    output.array_base = instr.array_base();
625 
626    if (r600_bytecode_add_output(m_bc, &output)) {
627       R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
628       return false;
629    }
630    return true;
631 }
632 
633 
visit(const TexInstruction & tex_instr)634 bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr)
635 {
636    int sampler_offset = 0;
637    auto addr = tex_instr.sampler_offset();
638    EBufferIndexMode index_mode = bim_none;
639 
640    if (addr) {
641       if (addr->type() == Value::literal) {
642          const auto& boffs = static_cast<const LiteralValue&>(*addr);
643          sampler_offset = boffs.value();
644       } else {
645          index_mode = emit_index_reg(*addr, 1);
646       }
647    }
648 
649    if (tex_fetch_results.find(tex_instr.src().sel()) !=
650        tex_fetch_results.end()) {
651       m_bc->force_add_cf = 1;
652       tex_fetch_results.clear();
653    }
654 
655    r600_bytecode_tex tex;
656    memset(&tex, 0, sizeof(struct r600_bytecode_tex));
657    tex.op = tex_instr.opcode();
658    tex.sampler_id = tex_instr.sampler_id() + sampler_offset;
659    tex.resource_id = tex_instr.resource_id() + sampler_offset;
660    tex.src_gpr = tex_instr.src().sel();
661    tex.dst_gpr = tex_instr.dst().sel();
662    tex.dst_sel_x = tex_instr.dest_swizzle(0);
663    tex.dst_sel_y = tex_instr.dest_swizzle(1);
664    tex.dst_sel_z = tex_instr.dest_swizzle(2);
665    tex.dst_sel_w = tex_instr.dest_swizzle(3);
666    tex.src_sel_x = tex_instr.src().chan_i(0);
667    tex.src_sel_y = tex_instr.src().chan_i(1);
668    tex.src_sel_z = tex_instr.src().chan_i(2);
669    tex.src_sel_w = tex_instr.src().chan_i(3);
670    tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
671    tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
672    tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
673    tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
674    tex.offset_x = tex_instr.get_offset(0);
675    tex.offset_y = tex_instr.get_offset(1);
676    tex.offset_z = tex_instr.get_offset(2);
677    tex.resource_index_mode = index_mode;
678    tex.sampler_index_mode = index_mode;
679 
680    if (tex.dst_sel_x < 4 &&
681        tex.dst_sel_y < 4 &&
682        tex.dst_sel_z < 4 &&
683        tex.dst_sel_w < 4)
684       tex_fetch_results.insert(tex.dst_gpr);
685 
686    if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
687        tex_instr.opcode() == TexInstruction::get_gradient_v)
688       tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
689    else
690       tex.inst_mod = tex_instr.inst_mode();
691    if (r600_bytecode_add_tex(m_bc, &tex)) {
692       R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
693       return false;
694    }
695    return true;
696 }
697 
visit(const FetchInstruction & fetch_instr)698 bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
699 {
700    int buffer_offset = 0;
701    auto addr = fetch_instr.buffer_offset();
702    auto index_mode = fetch_instr.buffer_index_mode();
703 
704    if (addr) {
705       if (addr->type() == Value::literal) {
706          const auto& boffs = static_cast<const LiteralValue&>(*addr);
707          buffer_offset = boffs.value();
708       } else {
709          index_mode = emit_index_reg(*addr, 0);
710       }
711    }
712 
713    if (fetch_instr.has_prelude()) {
714       for(auto &i : fetch_instr.prelude()) {
715          if (!i->accept(*this))
716             return false;
717       }
718    }
719 
720    bool use_tc = fetch_instr.use_tc() || (m_bc->chip_class == CAYMAN);
721    if (!use_tc &&
722        vtx_fetch_results.find(fetch_instr.src().sel()) !=
723        vtx_fetch_results.end()) {
724       m_bc->force_add_cf = 1;
725       vtx_fetch_results.clear();
726    }
727 
728    if (fetch_instr.use_tc() &&
729        tex_fetch_results.find(fetch_instr.src().sel()) !=
730        tex_fetch_results.end()) {
731       m_bc->force_add_cf = 1;
732       tex_fetch_results.clear();
733    }
734 
735    if (use_tc)
736       tex_fetch_results.insert(fetch_instr.dst().sel());
737    else
738       vtx_fetch_results.insert(fetch_instr.dst().sel());
739 
740    struct r600_bytecode_vtx vtx;
741    memset(&vtx, 0, sizeof(vtx));
742    vtx.op = fetch_instr.vc_opcode();
743    vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
744    vtx.fetch_type = fetch_instr.fetch_type();
745    vtx.src_gpr = fetch_instr.src().sel();
746    vtx.src_sel_x = fetch_instr.src().chan();
747    vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
748    vtx.dst_gpr = fetch_instr.dst().sel();
749    vtx.dst_sel_x = fetch_instr.swz(0);		/* SEL_X */
750    vtx.dst_sel_y = fetch_instr.swz(1);		/* SEL_Y */
751    vtx.dst_sel_z = fetch_instr.swz(2);		/* SEL_Z */
752    vtx.dst_sel_w = fetch_instr.swz(3);		/* SEL_W */
753    vtx.use_const_fields = fetch_instr.use_const_fields();
754    vtx.data_format = fetch_instr.data_format();
755    vtx.num_format_all = fetch_instr.num_format();		/* NUM_FORMAT_SCALED */
756    vtx.format_comp_all = fetch_instr.is_signed();	/* FORMAT_COMP_SIGNED */
757    vtx.endian = fetch_instr.endian_swap();
758    vtx.buffer_index_mode = index_mode;
759    vtx.offset = fetch_instr.offset();
760    vtx.indexed = fetch_instr.indexed();
761    vtx.uncached = fetch_instr.uncached();
762    vtx.elem_size = fetch_instr.elm_size();
763    vtx.array_base = fetch_instr.array_base();
764    vtx.array_size = fetch_instr.array_size();
765    vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
766 
767 
768    if (fetch_instr.use_tc()) {
769       if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
770          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
771          return false;
772       }
773 
774    } else {
775       if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
776          R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
777          return false;
778       }
779    }
780 
781    m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) && fetch_instr.use_vpm();
782    m_bc->cf_last->barrier = 1;
783 
784    return true;
785 }
786 
visit(const EmitVertex & instr)787 bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr)
788 {
789    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
790    if (!r)
791       m_bc->cf_last->count = instr.stream();
792    assert(m_bc->cf_last->count < 4);
793 
794    return r == 0;
795 }
796 
visit(const WaitAck & instr)797 bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr)
798 {
799    int r = r600_bytecode_add_cfinst(m_bc, instr.op());
800    if (!r) {
801       m_bc->cf_last->cf_addr = instr.n_ack();
802       m_bc->cf_last->barrier = 1;
803    }
804 
805    return r == 0;
806 }
807 
visit(const WriteScratchInstruction & instr)808 bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr)
809 {
810    struct r600_bytecode_output cf;
811 
812    memset(&cf, 0, sizeof(struct r600_bytecode_output));
813 
814    cf.op = CF_OP_MEM_SCRATCH;
815    cf.elem_size = 3;
816    cf.gpr = instr.gpr().sel();
817    cf.mark = 1;
818    cf.comp_mask = instr.write_mask();
819    cf.swizzle_x = 0;
820    cf.swizzle_y = 1;
821    cf.swizzle_z = 2;
822    cf.swizzle_w = 3;
823    cf.burst_count = 1;
824 
825    if (instr.indirect()) {
826       cf.type = 3;
827       cf.index_gpr = instr.address();
828 
829       /* The docu seems to be wrong here: In indirect addressing the
830        * address_base seems to be the array_size */
831       cf.array_size = instr.array_size();
832    } else {
833       cf.type = 2;
834       cf.array_base = instr.location();
835    }
836    /* This should be 0, but the address calculation is apparently wrong */
837 
838 
839    if (r600_bytecode_add_output(m_bc, &cf)){
840       R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
841       return false;
842    }
843 
844    return true;
845 }
846 
847 extern const std::map<ESDOp, int> ds_opcode_map;
848 
visit(const GDSInstr & instr)849 bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr)
850 {
851    struct r600_bytecode_gds gds;
852 
853    int uav_idx = -1;
854    auto addr = instr.uav_id();
855    if (addr->type() != Value::literal) {
856       emit_index_reg(*addr, 1);
857    } else {
858       const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
859       uav_idx = addr_reg.value();
860    }
861 
862    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
863 
864    gds.op = ds_opcode_map.at(instr.op());
865    gds.dst_gpr = instr.dest_sel();
866    gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
867    gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
868    gds.src_gpr = instr.src_sel();
869 
870    gds.src_sel_x = instr.src_swizzle(0);
871    gds.src_sel_y = instr.src_swizzle(1);
872    gds.src_sel_z = instr.src_swizzle(2);
873 
874    gds.dst_sel_x = instr.dest_swizzle(0);
875    gds.dst_sel_y = 7;
876    gds.dst_sel_z = 7;
877    gds.dst_sel_w = 7;
878    gds.src_gpr2 = 0;
879    gds.alloc_consume = 1; // Not Cayman
880 
881    int r = r600_bytecode_add_gds(m_bc, &gds);
882    if (r)
883       return false;
884    m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
885    m_bc->cf_last->barrier = 1;
886    return true;
887 }
888 
visit(const GDSStoreTessFactor & instr)889 bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
890 {
891    struct r600_bytecode_gds gds;
892 
893    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
894    gds.src_gpr = instr.sel();
895    gds.src_sel_x = instr.chan(0);
896    gds.src_sel_y = instr.chan(1);
897    gds.src_sel_z = 4;
898    gds.dst_sel_x = 7;
899    gds.dst_sel_y = 7;
900    gds.dst_sel_z = 7;
901    gds.dst_sel_w = 7;
902    gds.op = FETCH_OP_TF_WRITE;
903 
904    if (r600_bytecode_add_gds(m_bc, &gds) != 0)
905          return false;
906 
907    if (instr.chan(2) != 7) {
908       memset(&gds, 0, sizeof(struct r600_bytecode_gds));
909       gds.src_gpr = instr.sel();
910       gds.src_sel_x = instr.chan(2);
911       gds.src_sel_y = instr.chan(3);
912       gds.src_sel_z = 4;
913       gds.dst_sel_x = 7;
914       gds.dst_sel_y = 7;
915       gds.dst_sel_z = 7;
916       gds.dst_sel_w = 7;
917       gds.op = FETCH_OP_TF_WRITE;
918 
919       if (r600_bytecode_add_gds(m_bc, &gds))
920          return false;
921    }
922    return true;
923 }
924 
visit(const LDSWriteInstruction & instr)925 bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr)
926 {
927    r600_bytecode_alu alu;
928    memset(&alu, 0, sizeof(r600_bytecode_alu));
929 
930    alu.last = true;
931    alu.is_lds_idx_op = true;
932    copy_src(alu.src[0], instr.address());
933    copy_src(alu.src[1], instr.value0());
934 
935    if (instr.num_components() == 1) {
936       alu.op = LDS_OP2_LDS_WRITE;
937    } else {
938       alu.op = LDS_OP3_LDS_WRITE_REL;
939       alu.lds_idx = 1;
940       copy_src(alu.src[2], instr.value1());
941    }
942 
943    return r600_bytecode_add_alu(m_bc, &alu) == 0;
944 }
945 
visit(const LDSReadInstruction & instr)946 bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr)
947 {
948    int r;
949    unsigned nread = 0;
950    unsigned nfetch = 0;
951    unsigned n_values = instr.num_values();
952 
953    r600_bytecode_alu alu_fetch;
954    r600_bytecode_alu alu_read;
955 
956    /* We must add a new ALU clause if the fetch and read op would be split otherwise
957     * r600_asm limits at 120 slots = 240 dwords */
958    if (m_bc->cf_last->ndw > 240 - 4 * n_values)
959       m_bc->force_add_cf = 1;
960 
961    while (nread < n_values) {
962       if (nfetch < n_values) {
963          memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
964          alu_fetch.is_lds_idx_op = true;
965          alu_fetch.op = LDS_OP1_LDS_READ_RET;
966 
967          copy_src(alu_fetch.src[0], instr.address(nfetch));
968          alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
969          alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
970          alu_fetch.last = 1;
971          r = r600_bytecode_add_alu(m_bc, &alu_fetch);
972          m_bc->cf_last->nlds_read++;
973          if (r)
974             return false;
975       }
976 
977       if (nfetch >= n_values) {
978          memset(&alu_read, 0, sizeof(r600_bytecode_alu));
979          copy_dst(alu_read.dst, instr.dest(nread));
980          alu_read.op = ALU_OP1_MOV;
981          alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
982          alu_read.last = 1;
983          alu_read.dst.write = 1;
984          r = r600_bytecode_add_alu(m_bc, &alu_read);
985          m_bc->cf_last->nqueue_read++;
986          if (r)
987             return false;
988          ++nread;
989       }
990       ++nfetch;
991    }
992    assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
993 
994    return true;
995 }
996 
visit(const LDSAtomicInstruction & instr)997 bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr)
998 {
999    if (m_bc->cf_last->ndw > 240 - 4)
1000       m_bc->force_add_cf = 1;
1001 
1002    r600_bytecode_alu alu_fetch;
1003    r600_bytecode_alu alu_read;
1004 
1005    memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1006    alu_fetch.is_lds_idx_op = true;
1007    alu_fetch.op = instr.op();
1008 
1009    copy_src(alu_fetch.src[0], instr.address());
1010    copy_src(alu_fetch.src[1], instr.src0());
1011 
1012    if (instr.src1())
1013       copy_src(alu_fetch.src[2], *instr.src1());
1014    alu_fetch.last = 1;
1015    int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1016    if (r)
1017       return false;
1018 
1019    memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1020    copy_dst(alu_read.dst, instr.dest());
1021    alu_read.op = ALU_OP1_MOV;
1022    alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1023    alu_read.last = 1;
1024    alu_read.dst.write = 1;
1025    r = r600_bytecode_add_alu(m_bc, &alu_read);
1026    if (r)
1027       return false;
1028    return true;
1029 }
1030 
visit(const RatInstruction & instr)1031 bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr)
1032 {
1033    struct r600_bytecode_gds gds;
1034 
1035    int rat_idx = instr.rat_id();
1036    EBufferIndexMode rat_index_mode = bim_none;
1037    auto addr = instr.rat_id_offset();
1038 
1039    if (addr) {
1040       if (addr->type() != Value::literal) {
1041          rat_index_mode = emit_index_reg(*addr, 1);
1042       } else {
1043          const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
1044          rat_idx += addr_reg.value();
1045       }
1046    }
1047    memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1048 
1049    r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
1050    auto cf = m_bc->cf_last;
1051    cf->rat.id = rat_idx + m_shader->rat_base;
1052    cf->rat.inst = instr.rat_op();
1053    cf->rat.index_mode = rat_index_mode;
1054    cf->output.type = instr.need_ack() ? 3 : 1;
1055    cf->output.gpr = instr.data_gpr();
1056    cf->output.index_gpr = instr.index_gpr();
1057    cf->output.comp_mask = instr.comp_mask();
1058    cf->output.burst_count = instr.burst_count();
1059    assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
1060    if (cf->rat.inst != RatInstruction::STORE_TYPED) {
1061       assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
1062              instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
1063       assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
1064              instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
1065    }
1066 
1067    cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
1068    cf->barrier = 1;
1069    cf->mark = instr.need_ack();
1070    cf->output.elem_size = instr.elm_size();
1071    return true;
1072 }
1073 
1074 EBufferIndexMode
emit_index_reg(const Value & addr,unsigned idx)1075 AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
1076 {
1077    assert(idx < 2);
1078 
1079    if (!m_bc->index_loaded[idx] || m_loop_nesting ||
1080        m_bc->index_reg[idx] != addr.sel()
1081        ||  m_bc->index_reg_chan[idx] != addr.chan()) {
1082       struct r600_bytecode_alu alu;
1083 
1084       // Make sure MOVA is not last instr in clause
1085       if ((m_bc->cf_last->ndw>>1) >= 110)
1086          m_bc->force_add_cf = 1;
1087 
1088       if (m_bc->chip_class != CAYMAN) {
1089 
1090          EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
1091          memset(&alu, 0, sizeof(alu));
1092          alu.op = opcode_map.at(op1_mova_int);
1093          alu.dst.chan = 0;
1094          alu.src[0].sel = addr.sel();
1095          alu.src[0].chan = addr.chan();
1096          alu.last = 1;
1097          sfn_log << SfnLog::assembly << "   mova_int, ";
1098          int r = r600_bytecode_add_alu(m_bc, &alu);
1099          if (r)
1100             return bim_invalid;
1101 
1102          alu.op = opcode_map.at(idxop);
1103          alu.dst.chan = 0;
1104          alu.src[0].sel = 0;
1105          alu.src[0].chan = 0;
1106          alu.last = 1;
1107          sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1108          r = r600_bytecode_add_alu(m_bc, &alu);
1109          if (r)
1110             return bim_invalid;
1111       } else {
1112          memset(&alu, 0, sizeof(alu));
1113          alu.op = opcode_map.at(op1_mova_int);
1114          alu.dst.sel = idx == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
1115          alu.dst.chan = 0;
1116          alu.src[0].sel = addr.sel();
1117          alu.src[0].chan = addr.chan();
1118          alu.last = 1;
1119          sfn_log << SfnLog::assembly << "   mova_int, ";
1120          int r = r600_bytecode_add_alu(m_bc, &alu);
1121          if (r)
1122             return bim_invalid;
1123       }
1124 
1125       m_bc->ar_loaded = 0;
1126       m_bc->index_reg[idx] = addr.sel();
1127       m_bc->index_reg_chan[idx] = addr.chan();
1128       m_bc->index_loaded[idx] = true;
1129       sfn_log << SfnLog::assembly << "\n";
1130    }
1131    return idx == 0 ? bim_zero : bim_one;
1132 }
1133 
copy_dst(r600_bytecode_alu_dst & dst,const Value & d)1134 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1135                                             const Value& d)
1136 {
1137    assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1138 
1139    if (d.sel() > 124) {
1140       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1141       return false;
1142    }
1143 
1144    dst.sel = d.sel();
1145    dst.chan = d.chan();
1146 
1147    if (m_bc->index_reg[1] == dst.sel &&
1148        m_bc->index_reg_chan[1] == dst.chan)
1149       m_bc->index_loaded[1] = false;
1150 
1151    if (m_bc->index_reg[0] == dst.sel &&
1152        m_bc->index_reg_chan[0] == dst.chan)
1153       m_bc->index_loaded[0] = false;
1154 
1155    return true;
1156 }
1157 
copy_src(r600_bytecode_alu_src & src,const Value & s)1158 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1159 {
1160 
1161    if (s.type() == Value::gpr && s.sel() > 124) {
1162       R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1163       return false;
1164    }
1165 
1166    if (s.type() == Value::lds_direct)  {
1167       R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1168       return false;
1169    }
1170 
1171    if (s.type() == Value::kconst && s.sel() < 512)  {
1172       R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1173       return false;
1174    }
1175 
1176    if (s.type() == Value::literal) {
1177       auto& v = static_cast<const LiteralValue&>(s);
1178       if (v.value() == 0) {
1179          src.sel = ALU_SRC_0;
1180          src.chan = 0;
1181          return true;
1182       }
1183       if (v.value() == 1) {
1184          src.sel = ALU_SRC_1_INT;
1185          src.chan = 0;
1186          return true;
1187       }
1188       if (v.value_float() == 1.0f) {
1189          src.sel = ALU_SRC_1;
1190          src.chan = 0;
1191          return true;
1192       }
1193       if (v.value_float() == 0.5f) {
1194          src.sel = ALU_SRC_0_5;
1195          src.chan = 0;
1196          return true;
1197       }
1198       if (v.value() == 0xffffffff) {
1199          src.sel = ALU_SRC_M_1_INT;
1200          src.chan = 0;
1201          return true;
1202       }
1203       src.value = v.value();
1204    }
1205 
1206    src.sel = s.sel();
1207    src.chan = s.chan();
1208    if (s.type() == Value::kconst) {
1209       const UniformValue& cv = static_cast<const UniformValue&>(s);
1210       src.kc_bank = cv.kcache_bank();
1211       auto addr = cv.addr();
1212       if (addr) {
1213          src.kc_rel = 1;
1214          emit_index_reg(*addr, 0);
1215          auto type = m_bc->cf_last->op;
1216          if (r600_bytecode_add_cf(m_bc)) {
1217                  return false;
1218          }
1219          m_bc->cf_last->op = type;
1220       }
1221    }
1222 
1223    return true;
1224 }
1225 
1226 const std::map<EAluOp, int> opcode_map = {
1227 
1228    {op2_add, ALU_OP2_ADD},
1229    {op2_mul, ALU_OP2_MUL},
1230    {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1231    {op2_max, ALU_OP2_MAX},
1232    {op2_min, ALU_OP2_MIN},
1233    {op2_max_dx10, ALU_OP2_MAX_DX10},
1234    {op2_min_dx10, ALU_OP2_MIN_DX10},
1235    {op2_sete, ALU_OP2_SETE},
1236    {op2_setgt, ALU_OP2_SETGT},
1237    {op2_setge, ALU_OP2_SETGE},
1238    {op2_setne, ALU_OP2_SETNE},
1239    {op2_sete_dx10, ALU_OP2_SETE_DX10},
1240    {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1241    {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1242    {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1243    {op1_fract, ALU_OP1_FRACT},
1244    {op1_trunc, ALU_OP1_TRUNC},
1245    {op1_ceil, ALU_OP1_CEIL},
1246    {op1_rndne, ALU_OP1_RNDNE},
1247    {op1_floor, ALU_OP1_FLOOR},
1248    {op2_ashr_int, ALU_OP2_ASHR_INT},
1249    {op2_lshr_int, ALU_OP2_LSHR_INT},
1250    {op2_lshl_int, ALU_OP2_LSHL_INT},
1251    {op1_mov, ALU_OP1_MOV},
1252    {op0_nop, ALU_OP0_NOP},
1253    {op2_mul_64, ALU_OP2_MUL_64},
1254    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1255    {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1256    {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1257    {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1258    {op2_pred_sete, ALU_OP2_PRED_SETE},
1259    {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1260    {op2_pred_setge, ALU_OP2_PRED_SETGE},
1261    {op2_pred_setne, ALU_OP2_PRED_SETNE},
1262    //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1263    //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1264    //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1265    {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1266    {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1267    {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1268    {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1269    {op2_kille, ALU_OP2_KILLE},
1270    {op2_killgt, ALU_OP2_KILLGT},
1271    {op2_killge, ALU_OP2_KILLGE},
1272    {op2_killne, ALU_OP2_KILLNE},
1273    {op2_and_int, ALU_OP2_AND_INT},
1274    {op2_or_int, ALU_OP2_OR_INT},
1275    {op2_xor_int, ALU_OP2_XOR_INT},
1276    {op1_not_int, ALU_OP1_NOT_INT},
1277    {op2_add_int, ALU_OP2_ADD_INT},
1278    {op2_sub_int, ALU_OP2_SUB_INT},
1279    {op2_max_int, ALU_OP2_MAX_INT},
1280    {op2_min_int, ALU_OP2_MIN_INT},
1281    {op2_max_uint, ALU_OP2_MAX_UINT},
1282    {op2_min_uint, ALU_OP2_MIN_UINT},
1283    {op2_sete_int, ALU_OP2_SETE_INT},
1284    {op2_setgt_int, ALU_OP2_SETGT_INT},
1285    {op2_setge_int, ALU_OP2_SETGE_INT},
1286    {op2_setne_int, ALU_OP2_SETNE_INT},
1287    {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1288    {op2_setge_uint, ALU_OP2_SETGE_UINT},
1289    {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1290    {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1291    //p2_prede_int, ALU_OP2_PREDE_INT},
1292    {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1293    {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1294    {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1295    {op2_kille_int, ALU_OP2_KILLE_INT},
1296    {op2_killgt_int, ALU_OP2_KILLGT_INT},
1297    {op2_killge_int, ALU_OP2_KILLGE_INT},
1298    {op2_killne_int, ALU_OP2_KILLNE_INT},
1299    {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1300    {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1301    {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1302    {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1303    {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1304    {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1305    {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1306    {op1_bfrev_int, ALU_OP1_BFREV_INT},
1307    {op2_addc_uint, ALU_OP2_ADDC_UINT},
1308    {op2_subb_uint, ALU_OP2_SUBB_UINT},
1309    {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1310    {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1311    {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1312    {op2_set_mode, ALU_OP2_SET_MODE},
1313    {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1314    {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1315    {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1316    {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1317    {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1318    {op1_log_ieee, ALU_OP1_LOG_IEEE},
1319    {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1320    {op1_recip_ff, ALU_OP1_RECIP_FF},
1321    {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1322    {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1323    {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1324    {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1325    {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1326    {op1_sin, ALU_OP1_SIN},
1327    {op1_cos, ALU_OP1_COS},
1328    {op2_mullo_int, ALU_OP2_MULLO_INT},
1329    {op2_mulhi_int, ALU_OP2_MULHI_INT},
1330    {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1331    {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1332    {op1_recip_int, ALU_OP1_RECIP_INT},
1333    {op1_recip_uint, ALU_OP1_RECIP_UINT},
1334    {op1_recip_64, ALU_OP2_RECIP_64},
1335    {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1336    {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1337    {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1338    {op1_sqrt_64, ALU_OP2_SQRT_64},
1339    {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1340    {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1341    {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1342    {op2_bfm_int, ALU_OP2_BFM_INT},
1343    {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1344    {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1345    {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1346    {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1347    {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1348    {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1349    {op1_bcnt_int, ALU_OP1_BCNT_INT},
1350    {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1351    {op1_ffbl_int, ALU_OP1_FFBL_INT},
1352    {op1_ffbh_int, ALU_OP1_FFBH_INT},
1353    {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1354    {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1355    {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1356    {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1357    {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1358    {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1359    {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1360    {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1361    {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1362    {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1363    {op2_sete_64, ALU_OP2_SETE_64},
1364    {op2_setne_64, ALU_OP2_SETNE_64},
1365    {op2_setgt_64, ALU_OP2_SETGT_64},
1366    {op2_setge_64, ALU_OP2_SETGE_64},
1367    {op2_min_64, ALU_OP2_MIN_64},
1368    {op2_max_64, ALU_OP2_MAX_64},
1369    {op2_dot4, ALU_OP2_DOT4},
1370    {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1371    {op2_cube, ALU_OP2_CUBE},
1372    {op1_max4, ALU_OP1_MAX4},
1373    {op1_frexp_64, ALU_OP1_FREXP_64},
1374    {op1_ldexp_64, ALU_OP2_LDEXP_64},
1375    {op1_fract_64, ALU_OP1_FRACT_64},
1376    {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1377    {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1378    {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1379    {op2_add_64, ALU_OP2_ADD_64},
1380    {op1_mova_int, ALU_OP1_MOVA_INT},
1381    {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1382    {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1383    {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1384    {op2_dot, ALU_OP2_DOT},
1385    //p2_mul_prev, ALU_OP2_MUL_PREV},
1386    //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1387    //p2_add_prev, ALU_OP2_ADD_PREV},
1388    {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1389    {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1390    {op2_interp_xy, ALU_OP2_INTERP_XY},
1391    {op2_interp_zw, ALU_OP2_INTERP_ZW},
1392    {op2_interp_x, ALU_OP2_INTERP_X},
1393    {op2_interp_z, ALU_OP2_INTERP_Z},
1394    {op0_store_flags, ALU_OP1_STORE_FLAGS},
1395    {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1396    {op0_lds_1a, ALU_OP2_LDS_1A},
1397    {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1398    {op0_lds_2a, ALU_OP2_LDS_2A},
1399    {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1400    {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1401    {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1402       // {op 3 all left shift 6
1403    {op3_bfe_uint, ALU_OP3_BFE_UINT},
1404    {op3_bfe_int, ALU_OP3_BFE_INT},
1405    {op3_bfi_int, ALU_OP3_BFI_INT},
1406    {op3_fma, ALU_OP3_FMA},
1407    {op3_cndne_64, ALU_OP3_CNDNE_64},
1408    {op3_fma_64, ALU_OP3_FMA_64},
1409    {op3_lerp_uint, ALU_OP3_LERP_UINT},
1410    {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1411    {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1412    {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1413    {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1414    {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1415    {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1416    {op3_muladd, ALU_OP3_MULADD},
1417    {op3_muladd_m2, ALU_OP3_MULADD_M2},
1418    {op3_muladd_m4, ALU_OP3_MULADD_M4},
1419    {op3_muladd_d2, ALU_OP3_MULADD_D2},
1420    {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1421    {op3_cnde, ALU_OP3_CNDE},
1422    {op3_cndgt, ALU_OP3_CNDGT},
1423    {op3_cndge, ALU_OP3_CNDGE},
1424    {op3_cnde_int, ALU_OP3_CNDE_INT},
1425    {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1426    {op3_cndge_int, ALU_OP3_CNDGE_INT},
1427    {op3_mul_lit, ALU_OP3_MUL_LIT},
1428 };
1429 
1430 const std::map<ESDOp, int> ds_opcode_map = {
1431    {DS_OP_ADD, FETCH_OP_GDS_ADD},
1432    {DS_OP_SUB, FETCH_OP_GDS_SUB},
1433    {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1434    {DS_OP_INC, FETCH_OP_GDS_INC},
1435    {DS_OP_DEC, FETCH_OP_GDS_DEC},
1436    {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1437    {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1438    {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1439    {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1440    {DS_OP_AND, FETCH_OP_GDS_AND},
1441    {DS_OP_OR, FETCH_OP_GDS_OR},
1442    {DS_OP_XOR, FETCH_OP_GDS_XOR},
1443    {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1444    {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1445    {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1446    {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1447    {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1448    {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1449    {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1450    {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1451    {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1452    {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1453    {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1454    {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1455    {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1456    {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1457    {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1458    {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1459    {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1460    {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1461    {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1462    {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1463    {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1464    {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1465    {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1466    {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1467    {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1468    {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1469    {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1470    {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1471    {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1472    {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1473    {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1474    {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1475    {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1476    {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1477    {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1478    {DS_OP_INVALID, 0},
1479 };
1480 
1481 }
1482