1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
34
35 #include "../r600_shader.h"
36 #include "../eg_sq.h"
37
38 namespace r600 {
39
40 using std::vector;
41
42
43
44 struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor {
45
46 AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
47
48
49 bool emit(const Instruction::Pointer i);
reset_addr_registerr600::AssemblyFromShaderLegacyImpl50 void reset_addr_register() {m_last_addr.reset();}
51
52 public:
53 bool visit(const AluInstruction& i) override;
54 bool visit(const ExportInstruction& i) override;
55 bool visit(const TexInstruction& i) override;
56 bool visit(const FetchInstruction& i) override;
57 bool visit(const IfInstruction& i) override;
58 bool visit(const ElseInstruction& i) override;
59 bool visit(const IfElseEndInstruction& i) override;
60 bool visit(const LoopBeginInstruction& i) override;
61 bool visit(const LoopEndInstruction& i) override;
62 bool visit(const LoopBreakInstruction& i) override;
63 bool visit(const LoopContInstruction& i) override;
64 bool visit(const StreamOutIntruction& i) override;
65 bool visit(const MemRingOutIntruction& i) override;
66 bool visit(const EmitVertex& i) override;
67 bool visit(const WaitAck& i) override;
68 bool visit(const WriteScratchInstruction& i) override;
69 bool visit(const GDSInstr& i) override;
70 bool visit(const RatInstruction& i) override;
71 bool visit(const LDSWriteInstruction& i) override;
72 bool visit(const LDSReadInstruction& i) override;
73 bool visit(const LDSAtomicInstruction& i) override;
74 bool visit(const GDSStoreTessFactor& i) override;
75 bool visit(const InstructionBlock& i) override;
76
77 bool emit_load_addr(PValue addr);
78 bool emit_fs_pixel_export(const ExportInstruction & exi);
79 bool emit_vs_pos_export(const ExportInstruction & exi);
80 bool emit_vs_param_export(const ExportInstruction & exi);
81 bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
82 bool copy_src(r600_bytecode_alu_src& src, const Value& s);
83
84 EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
85
86 ConditionalJumpTracker m_jump_tracker;
87 CallStack m_callstack;
88
89 public:
90 r600_bytecode *m_bc;
91 r600_shader *m_shader;
92 r600_shader_key *m_key;
93 r600_bytecode_output m_output;
94 unsigned m_max_color_exports;
95 bool has_pos_output;
96 bool has_param_output;
97 PValue m_last_addr;
98 int m_loop_nesting;
99 std::set<uint32_t> m_nliterals_in_group;
100 std::set<int> vtx_fetch_results;
101 std::set<int> tex_fetch_results;
102 bool m_last_op_was_barrier;
103 };
104
105
AssemblyFromShaderLegacy(struct r600_shader * sh,r600_shader_key * key)106 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
107 r600_shader_key *key)
108 {
109 impl = new AssemblyFromShaderLegacyImpl(sh, key);
110 }
111
~AssemblyFromShaderLegacy()112 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
113 {
114 delete impl;
115 }
116
do_lower(const std::vector<InstructionBlock> & ir)117 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
118 {
119 if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
120 impl->m_shader->ninput > 0)
121 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
122
123
124 std::vector<Instruction::Pointer> exports;
125
126 for (const auto& block : ir) {
127 if (!impl->visit(block))
128 return false;
129 } /*
130 for (const auto& i : exports) {
131 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
132 return false;
133 }*/
134
135
136 const struct cf_op_info *last = nullptr;
137 if (impl->m_bc->cf_last)
138 last = r600_isa_cf(impl->m_bc->cf_last->op);
139
140 /* alu clause instructions don't have EOP bit, so add NOP */
141 if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
142 || impl->m_bc->cf_last->op == CF_OP_POP)
143 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
144
145 /* A fetch shader only can't be EOP (results in hang), but we can replace it
146 * by a NOP */
147 else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
148 impl->m_bc->cf_last->op = CF_OP_NOP;
149
150 if (impl->m_shader->bc.chip_class != CAYMAN)
151 impl->m_bc->cf_last->end_of_program = 1;
152 else
153 cm_bytecode_add_cf_end(impl->m_bc);
154
155 return true;
156 }
157
visit(const InstructionBlock & block)158 bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block)
159 {
160 for (const auto& i : block) {
161
162 if (i->type() != Instruction::vtx) {
163 vtx_fetch_results.clear();
164 if (i->type() != Instruction::tex)
165 tex_fetch_results.clear();
166 }
167
168 m_last_op_was_barrier &= i->type() == Instruction::alu;
169
170 sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
171
172 if (!i->accept(*this))
173 return false;
174
175 if (i->type() != Instruction::alu)
176 reset_addr_register();
177 }
178
179 return true;
180 }
181
AssemblyFromShaderLegacyImpl(r600_shader * sh,r600_shader_key * key)182 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
183 r600_shader_key *key):
184 m_callstack(sh->bc),
185 m_bc(&sh->bc),
186 m_shader(sh),
187 m_key(key),
188 has_pos_output(false),
189 has_param_output(false),
190 m_loop_nesting(0),
191 m_last_op_was_barrier(false)
192 {
193 m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
194
195 }
196
197 extern const std::map<EAluOp, int> opcode_map;
198
emit_load_addr(PValue addr)199 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
200 {
201 m_bc->ar_reg = addr->sel();
202 m_bc->ar_chan = addr->chan();
203 m_bc->ar_loaded = 0;
204 m_last_addr = addr;
205
206 sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
207
208 return true;
209 }
210
visit(const AluInstruction & ai)211 bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
212 {
213
214 struct r600_bytecode_alu alu;
215 memset(&alu, 0, sizeof(alu));
216 PValue addr_in_use;
217
218 if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
219 std::cerr << "Opcode not handled for " << ai <<"\n";
220 return false;
221 }
222
223 if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
224 return true;
225
226 m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
227
228 for (unsigned i = 0; i < ai.n_sources(); ++i) {
229 auto& s = ai.src(i);
230 if (s.type() == Value::literal) {
231 auto& v = static_cast<const LiteralValue&>(s);
232 if (v.value() != 0 &&
233 v.value() != 1 &&
234 v.value_float() != 1.0f &&
235 v.value_float() != 0.5f &&
236 v.value() != 0xffffffff)
237 m_nliterals_in_group.insert(v.value());
238 }
239 }
240
241 /* This instruction group would exceed the limit of literals, so
242 * force a new instruction group by adding a NOP as last
243 * instruction. This will no loner be needed with a real
244 * scheduler */
245 if (m_nliterals_in_group.size() > 4) {
246 sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group.size() << " inject a last op (nop)\n";
247 alu.op = ALU_OP0_NOP;
248 alu.last = 1;
249 alu.dst.chan = 3;
250 int retval = r600_bytecode_add_alu(m_bc, &alu);
251 if (retval)
252 return false;
253 memset(&alu, 0, sizeof(alu));
254 m_nliterals_in_group.clear();
255 for (unsigned i = 0; i < ai.n_sources(); ++i) {
256 auto& s = ai.src(i);
257 if (s.type() == Value::literal) {
258 auto& v = static_cast<const LiteralValue&>(s);
259 m_nliterals_in_group.insert(v.value());
260 }
261 }
262 }
263
264 alu.op = opcode_map.at(ai.opcode());
265
266 /* Missing test whether ai actually has a dest */
267 auto dst = ai.dest();
268
269 if (dst) {
270 if (!copy_dst(alu.dst, *dst))
271 return false;
272
273 alu.dst.write = ai.flag(alu_write);
274 alu.dst.clamp = ai.flag(alu_dst_clamp);
275
276 if (dst->type() == Value::gpr_array_value) {
277 auto& v = static_cast<const GPRArrayValue&>(*dst);
278 PValue addr = v.indirect();
279 if (addr) {
280 if (!m_last_addr || *addr != *m_last_addr) {
281 emit_load_addr(addr);
282 addr_in_use = addr;
283 }
284 alu.dst.rel = addr ? 1 : 0;;
285 }
286 }
287 }
288
289 alu.is_op3 = ai.n_sources() == 3;
290
291 for (unsigned i = 0; i < ai.n_sources(); ++i) {
292 auto& s = ai.src(i);
293
294 if (!copy_src(alu.src[i], s))
295 return false;
296 alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
297
298 if (s.type() == Value::gpr_array_value) {
299 auto& v = static_cast<const GPRArrayValue&>(s);
300 PValue addr = v.indirect();
301 if (addr) {
302 assert(!addr_in_use || (*addr_in_use == *addr));
303 if (!m_last_addr || *addr != *m_last_addr) {
304 emit_load_addr(addr);
305 addr_in_use = addr;
306 }
307 alu.src[i].rel = addr ? 1 : 0;
308 }
309 }
310 if (!alu.is_op3)
311 alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
312 }
313
314 if (ai.bank_swizzle() != alu_vec_unknown)
315 alu.bank_swizzle_force = ai.bank_swizzle();
316
317 alu.last = ai.flag(alu_last_instr);
318 alu.update_pred = ai.flag(alu_update_pred);
319 alu.execute_mask = ai.flag(alu_update_exec);
320
321 /* If the destination register is equal to the last loaded address register
322 * then clear the latter one, because the values will no longer be identical */
323 if (m_last_addr)
324 sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
325
326 if (dst)
327 sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
328
329 if (dst && m_last_addr)
330 if (*dst == *m_last_addr) {
331 sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
332 m_last_addr.reset();
333 }
334
335 auto cf_op = ai.cf_type();
336
337 unsigned type = 0;
338 switch (cf_op) {
339 case cf_alu: type = CF_OP_ALU; break;
340 case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
341 case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
342 case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
343 case cf_alu_break: type = CF_OP_ALU_BREAK; break;
344 case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
345 case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
346 case cf_alu_extended: type = CF_OP_ALU_EXT; break;
347 default:
348 assert(0 && "cf_alu_undefined should have been replaced");
349 }
350
351 if (alu.last)
352 m_nliterals_in_group.clear();
353
354 bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
355
356 if (ai.opcode() == op1_mova_int)
357 m_bc->ar_loaded = 0;
358
359 if (ai.opcode() == op1_set_cf_idx0)
360 m_bc->index_loaded[0] = 1;
361
362 if (ai.opcode() == op1_set_cf_idx1)
363 m_bc->index_loaded[1] = 1;
364
365
366 m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
367 ai.opcode() == op2_killne_int ||
368 ai.opcode() == op1_set_cf_idx0 ||
369 ai.opcode() == op1_set_cf_idx1);
370 return retval;
371 }
372
emit_vs_pos_export(const ExportInstruction & exi)373 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
374 {
375 r600_bytecode_output output;
376 memset(&output, 0, sizeof(output));
377 assert(exi.gpr().type() == Value::gpr_vector);
378 const auto& gpr = exi.gpr();
379 output.gpr = gpr.sel();
380 output.elem_size = 3;
381 output.swizzle_x = gpr.chan_i(0);
382 output.swizzle_y = gpr.chan_i(1);
383 output.swizzle_z = gpr.chan_i(2);
384 output.swizzle_w = gpr.chan_i(3);
385 output.burst_count = 1;
386 output.array_base = 60 + exi.location();
387 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
388 output.type = exi.export_type();
389
390
391 if (r600_bytecode_add_output(m_bc, &output)) {
392 R600_ERR("Error adding pixel export at location %d\n", exi.location());
393 return false;
394 }
395
396 return true;
397 }
398
399
emit_vs_param_export(const ExportInstruction & exi)400 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
401 {
402 r600_bytecode_output output;
403 assert(exi.gpr().type() == Value::gpr_vector);
404 const auto& gpr = exi.gpr();
405
406 memset(&output, 0, sizeof(output));
407 output.gpr = gpr.sel();
408 output.elem_size = 3;
409 output.swizzle_x = gpr.chan_i(0);
410 output.swizzle_y = gpr.chan_i(1);
411 output.swizzle_z = gpr.chan_i(2);
412 output.swizzle_w = gpr.chan_i(3);
413 output.burst_count = 1;
414 output.array_base = exi.location();
415 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
416 output.type = exi.export_type();
417
418
419 if (r600_bytecode_add_output(m_bc, &output)) {
420 R600_ERR("Error adding pixel export at location %d\n", exi.location());
421 return false;
422 }
423
424 return true;
425 }
426
427
emit_fs_pixel_export(const ExportInstruction & exi)428 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
429 {
430 if (exi.location() >= m_max_color_exports && exi.location() < 60) {
431 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
432 exi.location(), m_max_color_exports);
433 return true;
434 }
435
436 assert(exi.gpr().type() == Value::gpr_vector);
437 const auto& gpr = exi.gpr();
438
439 r600_bytecode_output output;
440 memset(&output, 0, sizeof(output));
441
442 output.gpr = gpr.sel();
443 output.elem_size = 3;
444 output.swizzle_x = gpr.chan_i(0);
445 output.swizzle_y = gpr.chan_i(1);
446 output.swizzle_z = gpr.chan_i(2);
447 output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
448 output.burst_count = 1;
449 output.array_base = exi.location();
450 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
451 output.type = exi.export_type();
452
453
454 if (r600_bytecode_add_output(m_bc, &output)) {
455 R600_ERR("Error adding pixel export at location %d\n", exi.location());
456 return false;
457 }
458
459 return true;
460 }
461
462
visit(const ExportInstruction & exi)463 bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi)
464 {
465 switch (exi.export_type()) {
466 case ExportInstruction::et_pixel:
467 return emit_fs_pixel_export(exi);
468 case ExportInstruction::et_pos:
469 return emit_vs_pos_export(exi);
470 case ExportInstruction::et_param:
471 return emit_vs_param_export(exi);
472 default:
473 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
474 return false;
475 }
476 }
477
visit(const IfInstruction & if_instr)478 bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr)
479 {
480 int elems = m_callstack.push(FC_PUSH_VPM);
481 bool needs_workaround = false;
482
483 if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
484 needs_workaround = true;
485
486 if (m_bc->family != CHIP_HEMLOCK &&
487 m_bc->family != CHIP_CYPRESS &&
488 m_bc->family != CHIP_JUNIPER) {
489 unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
490 unsigned dmod2 = (elems) % m_bc->stack.entry_size;
491
492 if (elems && (!dmod1 || !dmod2))
493 needs_workaround = true;
494 }
495
496 auto& pred = if_instr.pred();
497
498 if (needs_workaround) {
499 r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
500 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
501 auto new_pred = pred;
502 new_pred.set_cf_type(cf_alu);
503 visit(new_pred);
504 } else
505 visit(pred);
506
507 r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
508
509 m_jump_tracker.push(m_bc->cf_last, jt_if);
510 return true;
511 }
512
visit(UNUSED const ElseInstruction & else_instr)513 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr)
514 {
515 r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
516 m_bc->cf_last->pop_count = 1;
517 return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
518 }
519
visit(UNUSED const IfElseEndInstruction & endif_instr)520 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr)
521 {
522 m_callstack.pop(FC_PUSH_VPM);
523
524 unsigned force_pop = m_bc->force_add_cf;
525 if (!force_pop) {
526 int alu_pop = 3;
527 if (m_bc->cf_last) {
528 if (m_bc->cf_last->op == CF_OP_ALU)
529 alu_pop = 0;
530 else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
531 alu_pop = 1;
532 }
533 alu_pop += 1;
534 if (alu_pop == 1) {
535 m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
536 m_bc->force_add_cf = 1;
537 } else if (alu_pop == 2) {
538 m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
539 m_bc->force_add_cf = 1;
540 } else {
541 force_pop = 1;
542 }
543 }
544
545 if (force_pop) {
546 r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
547 m_bc->cf_last->pop_count = 1;
548 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
549 }
550
551 return m_jump_tracker.pop(m_bc->cf_last, jt_if);
552 }
553
visit(UNUSED const LoopBeginInstruction & instr)554 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr)
555 {
556 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
557 m_jump_tracker.push(m_bc->cf_last, jt_loop);
558 m_callstack.push(FC_LOOP);
559 ++m_loop_nesting;
560 return true;
561 }
562
visit(UNUSED const LoopEndInstruction & instr)563 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr)
564 {
565 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
566 m_callstack.pop(FC_LOOP);
567 assert(m_loop_nesting);
568 --m_loop_nesting;
569 return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
570 }
571
visit(UNUSED const LoopBreakInstruction & instr)572 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr)
573 {
574 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
575 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
576 }
577
visit(UNUSED const LoopContInstruction & instr)578 bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr)
579 {
580 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
581 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
582 }
583
visit(const StreamOutIntruction & so_instr)584 bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr)
585 {
586 struct r600_bytecode_output output;
587 memset(&output, 0, sizeof(struct r600_bytecode_output));
588
589 output.gpr = so_instr.gpr().sel();
590 output.elem_size = so_instr.element_size();
591 output.array_base = so_instr.array_base();
592 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
593 output.burst_count = so_instr.burst_count();
594 output.array_size = so_instr.array_size();
595 output.comp_mask = so_instr.comp_mask();
596 output.op = so_instr.op();
597
598 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
599
600
601 if (r600_bytecode_add_output(m_bc, &output)) {
602 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
603 return false;
604 }
605 return true;
606 }
607
608
visit(const MemRingOutIntruction & instr)609 bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr)
610 {
611 struct r600_bytecode_output output;
612 memset(&output, 0, sizeof(struct r600_bytecode_output));
613
614 output.gpr = instr.gpr().sel();
615 output.type = instr.type();
616 output.elem_size = 3;
617 output.comp_mask = 0xf;
618 output.burst_count = 1;
619 output.op = instr.op();
620 if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
621 output.index_gpr = instr.index_reg();
622 output.array_size = 0xfff;
623 }
624 output.array_base = instr.array_base();
625
626 if (r600_bytecode_add_output(m_bc, &output)) {
627 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
628 return false;
629 }
630 return true;
631 }
632
633
visit(const TexInstruction & tex_instr)634 bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr)
635 {
636 int sampler_offset = 0;
637 auto addr = tex_instr.sampler_offset();
638 EBufferIndexMode index_mode = bim_none;
639
640 if (addr) {
641 if (addr->type() == Value::literal) {
642 const auto& boffs = static_cast<const LiteralValue&>(*addr);
643 sampler_offset = boffs.value();
644 } else {
645 index_mode = emit_index_reg(*addr, 1);
646 }
647 }
648
649 if (tex_fetch_results.find(tex_instr.src().sel()) !=
650 tex_fetch_results.end()) {
651 m_bc->force_add_cf = 1;
652 tex_fetch_results.clear();
653 }
654
655 r600_bytecode_tex tex;
656 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
657 tex.op = tex_instr.opcode();
658 tex.sampler_id = tex_instr.sampler_id() + sampler_offset;
659 tex.resource_id = tex_instr.resource_id() + sampler_offset;
660 tex.src_gpr = tex_instr.src().sel();
661 tex.dst_gpr = tex_instr.dst().sel();
662 tex.dst_sel_x = tex_instr.dest_swizzle(0);
663 tex.dst_sel_y = tex_instr.dest_swizzle(1);
664 tex.dst_sel_z = tex_instr.dest_swizzle(2);
665 tex.dst_sel_w = tex_instr.dest_swizzle(3);
666 tex.src_sel_x = tex_instr.src().chan_i(0);
667 tex.src_sel_y = tex_instr.src().chan_i(1);
668 tex.src_sel_z = tex_instr.src().chan_i(2);
669 tex.src_sel_w = tex_instr.src().chan_i(3);
670 tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
671 tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
672 tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
673 tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
674 tex.offset_x = tex_instr.get_offset(0);
675 tex.offset_y = tex_instr.get_offset(1);
676 tex.offset_z = tex_instr.get_offset(2);
677 tex.resource_index_mode = index_mode;
678 tex.sampler_index_mode = index_mode;
679
680 if (tex.dst_sel_x < 4 &&
681 tex.dst_sel_y < 4 &&
682 tex.dst_sel_z < 4 &&
683 tex.dst_sel_w < 4)
684 tex_fetch_results.insert(tex.dst_gpr);
685
686 if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
687 tex_instr.opcode() == TexInstruction::get_gradient_v)
688 tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
689 else
690 tex.inst_mod = tex_instr.inst_mode();
691 if (r600_bytecode_add_tex(m_bc, &tex)) {
692 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
693 return false;
694 }
695 return true;
696 }
697
visit(const FetchInstruction & fetch_instr)698 bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
699 {
700 int buffer_offset = 0;
701 auto addr = fetch_instr.buffer_offset();
702 auto index_mode = fetch_instr.buffer_index_mode();
703
704 if (addr) {
705 if (addr->type() == Value::literal) {
706 const auto& boffs = static_cast<const LiteralValue&>(*addr);
707 buffer_offset = boffs.value();
708 } else {
709 index_mode = emit_index_reg(*addr, 0);
710 }
711 }
712
713 if (fetch_instr.has_prelude()) {
714 for(auto &i : fetch_instr.prelude()) {
715 if (!i->accept(*this))
716 return false;
717 }
718 }
719
720 bool use_tc = fetch_instr.use_tc() || (m_bc->chip_class == CAYMAN);
721 if (!use_tc &&
722 vtx_fetch_results.find(fetch_instr.src().sel()) !=
723 vtx_fetch_results.end()) {
724 m_bc->force_add_cf = 1;
725 vtx_fetch_results.clear();
726 }
727
728 if (fetch_instr.use_tc() &&
729 tex_fetch_results.find(fetch_instr.src().sel()) !=
730 tex_fetch_results.end()) {
731 m_bc->force_add_cf = 1;
732 tex_fetch_results.clear();
733 }
734
735 if (use_tc)
736 tex_fetch_results.insert(fetch_instr.dst().sel());
737 else
738 vtx_fetch_results.insert(fetch_instr.dst().sel());
739
740 struct r600_bytecode_vtx vtx;
741 memset(&vtx, 0, sizeof(vtx));
742 vtx.op = fetch_instr.vc_opcode();
743 vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
744 vtx.fetch_type = fetch_instr.fetch_type();
745 vtx.src_gpr = fetch_instr.src().sel();
746 vtx.src_sel_x = fetch_instr.src().chan();
747 vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
748 vtx.dst_gpr = fetch_instr.dst().sel();
749 vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
750 vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
751 vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
752 vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
753 vtx.use_const_fields = fetch_instr.use_const_fields();
754 vtx.data_format = fetch_instr.data_format();
755 vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
756 vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
757 vtx.endian = fetch_instr.endian_swap();
758 vtx.buffer_index_mode = index_mode;
759 vtx.offset = fetch_instr.offset();
760 vtx.indexed = fetch_instr.indexed();
761 vtx.uncached = fetch_instr.uncached();
762 vtx.elem_size = fetch_instr.elm_size();
763 vtx.array_base = fetch_instr.array_base();
764 vtx.array_size = fetch_instr.array_size();
765 vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
766
767
768 if (fetch_instr.use_tc()) {
769 if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
770 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
771 return false;
772 }
773
774 } else {
775 if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
776 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
777 return false;
778 }
779 }
780
781 m_bc->cf_last->vpm = (m_bc->type == PIPE_SHADER_FRAGMENT) && fetch_instr.use_vpm();
782 m_bc->cf_last->barrier = 1;
783
784 return true;
785 }
786
visit(const EmitVertex & instr)787 bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr)
788 {
789 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
790 if (!r)
791 m_bc->cf_last->count = instr.stream();
792 assert(m_bc->cf_last->count < 4);
793
794 return r == 0;
795 }
796
visit(const WaitAck & instr)797 bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr)
798 {
799 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
800 if (!r) {
801 m_bc->cf_last->cf_addr = instr.n_ack();
802 m_bc->cf_last->barrier = 1;
803 }
804
805 return r == 0;
806 }
807
visit(const WriteScratchInstruction & instr)808 bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr)
809 {
810 struct r600_bytecode_output cf;
811
812 memset(&cf, 0, sizeof(struct r600_bytecode_output));
813
814 cf.op = CF_OP_MEM_SCRATCH;
815 cf.elem_size = 3;
816 cf.gpr = instr.gpr().sel();
817 cf.mark = 1;
818 cf.comp_mask = instr.write_mask();
819 cf.swizzle_x = 0;
820 cf.swizzle_y = 1;
821 cf.swizzle_z = 2;
822 cf.swizzle_w = 3;
823 cf.burst_count = 1;
824
825 if (instr.indirect()) {
826 cf.type = 3;
827 cf.index_gpr = instr.address();
828
829 /* The docu seems to be wrong here: In indirect addressing the
830 * address_base seems to be the array_size */
831 cf.array_size = instr.array_size();
832 } else {
833 cf.type = 2;
834 cf.array_base = instr.location();
835 }
836 /* This should be 0, but the address calculation is apparently wrong */
837
838
839 if (r600_bytecode_add_output(m_bc, &cf)){
840 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
841 return false;
842 }
843
844 return true;
845 }
846
847 extern const std::map<ESDOp, int> ds_opcode_map;
848
visit(const GDSInstr & instr)849 bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr)
850 {
851 struct r600_bytecode_gds gds;
852
853 int uav_idx = -1;
854 auto addr = instr.uav_id();
855 if (addr->type() != Value::literal) {
856 emit_index_reg(*addr, 1);
857 } else {
858 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
859 uav_idx = addr_reg.value();
860 }
861
862 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
863
864 gds.op = ds_opcode_map.at(instr.op());
865 gds.dst_gpr = instr.dest_sel();
866 gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
867 gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
868 gds.src_gpr = instr.src_sel();
869
870 gds.src_sel_x = instr.src_swizzle(0);
871 gds.src_sel_y = instr.src_swizzle(1);
872 gds.src_sel_z = instr.src_swizzle(2);
873
874 gds.dst_sel_x = instr.dest_swizzle(0);
875 gds.dst_sel_y = 7;
876 gds.dst_sel_z = 7;
877 gds.dst_sel_w = 7;
878 gds.src_gpr2 = 0;
879 gds.alloc_consume = 1; // Not Cayman
880
881 int r = r600_bytecode_add_gds(m_bc, &gds);
882 if (r)
883 return false;
884 m_bc->cf_last->vpm = PIPE_SHADER_FRAGMENT == m_bc->type;
885 m_bc->cf_last->barrier = 1;
886 return true;
887 }
888
visit(const GDSStoreTessFactor & instr)889 bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
890 {
891 struct r600_bytecode_gds gds;
892
893 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
894 gds.src_gpr = instr.sel();
895 gds.src_sel_x = instr.chan(0);
896 gds.src_sel_y = instr.chan(1);
897 gds.src_sel_z = 4;
898 gds.dst_sel_x = 7;
899 gds.dst_sel_y = 7;
900 gds.dst_sel_z = 7;
901 gds.dst_sel_w = 7;
902 gds.op = FETCH_OP_TF_WRITE;
903
904 if (r600_bytecode_add_gds(m_bc, &gds) != 0)
905 return false;
906
907 if (instr.chan(2) != 7) {
908 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
909 gds.src_gpr = instr.sel();
910 gds.src_sel_x = instr.chan(2);
911 gds.src_sel_y = instr.chan(3);
912 gds.src_sel_z = 4;
913 gds.dst_sel_x = 7;
914 gds.dst_sel_y = 7;
915 gds.dst_sel_z = 7;
916 gds.dst_sel_w = 7;
917 gds.op = FETCH_OP_TF_WRITE;
918
919 if (r600_bytecode_add_gds(m_bc, &gds))
920 return false;
921 }
922 return true;
923 }
924
visit(const LDSWriteInstruction & instr)925 bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr)
926 {
927 r600_bytecode_alu alu;
928 memset(&alu, 0, sizeof(r600_bytecode_alu));
929
930 alu.last = true;
931 alu.is_lds_idx_op = true;
932 copy_src(alu.src[0], instr.address());
933 copy_src(alu.src[1], instr.value0());
934
935 if (instr.num_components() == 1) {
936 alu.op = LDS_OP2_LDS_WRITE;
937 } else {
938 alu.op = LDS_OP3_LDS_WRITE_REL;
939 alu.lds_idx = 1;
940 copy_src(alu.src[2], instr.value1());
941 }
942
943 return r600_bytecode_add_alu(m_bc, &alu) == 0;
944 }
945
visit(const LDSReadInstruction & instr)946 bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr)
947 {
948 int r;
949 unsigned nread = 0;
950 unsigned nfetch = 0;
951 unsigned n_values = instr.num_values();
952
953 r600_bytecode_alu alu_fetch;
954 r600_bytecode_alu alu_read;
955
956 /* We must add a new ALU clause if the fetch and read op would be split otherwise
957 * r600_asm limits at 120 slots = 240 dwords */
958 if (m_bc->cf_last->ndw > 240 - 4 * n_values)
959 m_bc->force_add_cf = 1;
960
961 while (nread < n_values) {
962 if (nfetch < n_values) {
963 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
964 alu_fetch.is_lds_idx_op = true;
965 alu_fetch.op = LDS_OP1_LDS_READ_RET;
966
967 copy_src(alu_fetch.src[0], instr.address(nfetch));
968 alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
969 alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
970 alu_fetch.last = 1;
971 r = r600_bytecode_add_alu(m_bc, &alu_fetch);
972 m_bc->cf_last->nlds_read++;
973 if (r)
974 return false;
975 }
976
977 if (nfetch >= n_values) {
978 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
979 copy_dst(alu_read.dst, instr.dest(nread));
980 alu_read.op = ALU_OP1_MOV;
981 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
982 alu_read.last = 1;
983 alu_read.dst.write = 1;
984 r = r600_bytecode_add_alu(m_bc, &alu_read);
985 m_bc->cf_last->nqueue_read++;
986 if (r)
987 return false;
988 ++nread;
989 }
990 ++nfetch;
991 }
992 assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
993
994 return true;
995 }
996
visit(const LDSAtomicInstruction & instr)997 bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr)
998 {
999 if (m_bc->cf_last->ndw > 240 - 4)
1000 m_bc->force_add_cf = 1;
1001
1002 r600_bytecode_alu alu_fetch;
1003 r600_bytecode_alu alu_read;
1004
1005 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1006 alu_fetch.is_lds_idx_op = true;
1007 alu_fetch.op = instr.op();
1008
1009 copy_src(alu_fetch.src[0], instr.address());
1010 copy_src(alu_fetch.src[1], instr.src0());
1011
1012 if (instr.src1())
1013 copy_src(alu_fetch.src[2], *instr.src1());
1014 alu_fetch.last = 1;
1015 int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1016 if (r)
1017 return false;
1018
1019 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1020 copy_dst(alu_read.dst, instr.dest());
1021 alu_read.op = ALU_OP1_MOV;
1022 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1023 alu_read.last = 1;
1024 alu_read.dst.write = 1;
1025 r = r600_bytecode_add_alu(m_bc, &alu_read);
1026 if (r)
1027 return false;
1028 return true;
1029 }
1030
visit(const RatInstruction & instr)1031 bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr)
1032 {
1033 struct r600_bytecode_gds gds;
1034
1035 int rat_idx = instr.rat_id();
1036 EBufferIndexMode rat_index_mode = bim_none;
1037 auto addr = instr.rat_id_offset();
1038
1039 if (addr) {
1040 if (addr->type() != Value::literal) {
1041 rat_index_mode = emit_index_reg(*addr, 1);
1042 } else {
1043 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
1044 rat_idx += addr_reg.value();
1045 }
1046 }
1047 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1048
1049 r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
1050 auto cf = m_bc->cf_last;
1051 cf->rat.id = rat_idx + m_shader->rat_base;
1052 cf->rat.inst = instr.rat_op();
1053 cf->rat.index_mode = rat_index_mode;
1054 cf->output.type = instr.need_ack() ? 3 : 1;
1055 cf->output.gpr = instr.data_gpr();
1056 cf->output.index_gpr = instr.index_gpr();
1057 cf->output.comp_mask = instr.comp_mask();
1058 cf->output.burst_count = instr.burst_count();
1059 assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
1060 if (cf->rat.inst != RatInstruction::STORE_TYPED) {
1061 assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
1062 instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
1063 assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
1064 instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
1065 }
1066
1067 cf->vpm = m_bc->type == PIPE_SHADER_FRAGMENT;
1068 cf->barrier = 1;
1069 cf->mark = instr.need_ack();
1070 cf->output.elem_size = instr.elm_size();
1071 return true;
1072 }
1073
1074 EBufferIndexMode
emit_index_reg(const Value & addr,unsigned idx)1075 AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
1076 {
1077 assert(idx < 2);
1078
1079 if (!m_bc->index_loaded[idx] || m_loop_nesting ||
1080 m_bc->index_reg[idx] != addr.sel()
1081 || m_bc->index_reg_chan[idx] != addr.chan()) {
1082 struct r600_bytecode_alu alu;
1083
1084 // Make sure MOVA is not last instr in clause
1085 if ((m_bc->cf_last->ndw>>1) >= 110)
1086 m_bc->force_add_cf = 1;
1087
1088 if (m_bc->chip_class != CAYMAN) {
1089
1090 EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
1091 memset(&alu, 0, sizeof(alu));
1092 alu.op = opcode_map.at(op1_mova_int);
1093 alu.dst.chan = 0;
1094 alu.src[0].sel = addr.sel();
1095 alu.src[0].chan = addr.chan();
1096 alu.last = 1;
1097 sfn_log << SfnLog::assembly << " mova_int, ";
1098 int r = r600_bytecode_add_alu(m_bc, &alu);
1099 if (r)
1100 return bim_invalid;
1101
1102 alu.op = opcode_map.at(idxop);
1103 alu.dst.chan = 0;
1104 alu.src[0].sel = 0;
1105 alu.src[0].chan = 0;
1106 alu.last = 1;
1107 sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1108 r = r600_bytecode_add_alu(m_bc, &alu);
1109 if (r)
1110 return bim_invalid;
1111 } else {
1112 memset(&alu, 0, sizeof(alu));
1113 alu.op = opcode_map.at(op1_mova_int);
1114 alu.dst.sel = idx == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
1115 alu.dst.chan = 0;
1116 alu.src[0].sel = addr.sel();
1117 alu.src[0].chan = addr.chan();
1118 alu.last = 1;
1119 sfn_log << SfnLog::assembly << " mova_int, ";
1120 int r = r600_bytecode_add_alu(m_bc, &alu);
1121 if (r)
1122 return bim_invalid;
1123 }
1124
1125 m_bc->ar_loaded = 0;
1126 m_bc->index_reg[idx] = addr.sel();
1127 m_bc->index_reg_chan[idx] = addr.chan();
1128 m_bc->index_loaded[idx] = true;
1129 sfn_log << SfnLog::assembly << "\n";
1130 }
1131 return idx == 0 ? bim_zero : bim_one;
1132 }
1133
copy_dst(r600_bytecode_alu_dst & dst,const Value & d)1134 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1135 const Value& d)
1136 {
1137 assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1138
1139 if (d.sel() > 124) {
1140 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1141 return false;
1142 }
1143
1144 dst.sel = d.sel();
1145 dst.chan = d.chan();
1146
1147 if (m_bc->index_reg[1] == dst.sel &&
1148 m_bc->index_reg_chan[1] == dst.chan)
1149 m_bc->index_loaded[1] = false;
1150
1151 if (m_bc->index_reg[0] == dst.sel &&
1152 m_bc->index_reg_chan[0] == dst.chan)
1153 m_bc->index_loaded[0] = false;
1154
1155 return true;
1156 }
1157
copy_src(r600_bytecode_alu_src & src,const Value & s)1158 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1159 {
1160
1161 if (s.type() == Value::gpr && s.sel() > 124) {
1162 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1163 return false;
1164 }
1165
1166 if (s.type() == Value::lds_direct) {
1167 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1168 return false;
1169 }
1170
1171 if (s.type() == Value::kconst && s.sel() < 512) {
1172 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1173 return false;
1174 }
1175
1176 if (s.type() == Value::literal) {
1177 auto& v = static_cast<const LiteralValue&>(s);
1178 if (v.value() == 0) {
1179 src.sel = ALU_SRC_0;
1180 src.chan = 0;
1181 return true;
1182 }
1183 if (v.value() == 1) {
1184 src.sel = ALU_SRC_1_INT;
1185 src.chan = 0;
1186 return true;
1187 }
1188 if (v.value_float() == 1.0f) {
1189 src.sel = ALU_SRC_1;
1190 src.chan = 0;
1191 return true;
1192 }
1193 if (v.value_float() == 0.5f) {
1194 src.sel = ALU_SRC_0_5;
1195 src.chan = 0;
1196 return true;
1197 }
1198 if (v.value() == 0xffffffff) {
1199 src.sel = ALU_SRC_M_1_INT;
1200 src.chan = 0;
1201 return true;
1202 }
1203 src.value = v.value();
1204 }
1205
1206 src.sel = s.sel();
1207 src.chan = s.chan();
1208 if (s.type() == Value::kconst) {
1209 const UniformValue& cv = static_cast<const UniformValue&>(s);
1210 src.kc_bank = cv.kcache_bank();
1211 auto addr = cv.addr();
1212 if (addr) {
1213 src.kc_rel = 1;
1214 emit_index_reg(*addr, 0);
1215 auto type = m_bc->cf_last->op;
1216 if (r600_bytecode_add_cf(m_bc)) {
1217 return false;
1218 }
1219 m_bc->cf_last->op = type;
1220 }
1221 }
1222
1223 return true;
1224 }
1225
1226 const std::map<EAluOp, int> opcode_map = {
1227
1228 {op2_add, ALU_OP2_ADD},
1229 {op2_mul, ALU_OP2_MUL},
1230 {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1231 {op2_max, ALU_OP2_MAX},
1232 {op2_min, ALU_OP2_MIN},
1233 {op2_max_dx10, ALU_OP2_MAX_DX10},
1234 {op2_min_dx10, ALU_OP2_MIN_DX10},
1235 {op2_sete, ALU_OP2_SETE},
1236 {op2_setgt, ALU_OP2_SETGT},
1237 {op2_setge, ALU_OP2_SETGE},
1238 {op2_setne, ALU_OP2_SETNE},
1239 {op2_sete_dx10, ALU_OP2_SETE_DX10},
1240 {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1241 {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1242 {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1243 {op1_fract, ALU_OP1_FRACT},
1244 {op1_trunc, ALU_OP1_TRUNC},
1245 {op1_ceil, ALU_OP1_CEIL},
1246 {op1_rndne, ALU_OP1_RNDNE},
1247 {op1_floor, ALU_OP1_FLOOR},
1248 {op2_ashr_int, ALU_OP2_ASHR_INT},
1249 {op2_lshr_int, ALU_OP2_LSHR_INT},
1250 {op2_lshl_int, ALU_OP2_LSHL_INT},
1251 {op1_mov, ALU_OP1_MOV},
1252 {op0_nop, ALU_OP0_NOP},
1253 {op2_mul_64, ALU_OP2_MUL_64},
1254 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1255 {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1256 {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1257 {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1258 {op2_pred_sete, ALU_OP2_PRED_SETE},
1259 {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1260 {op2_pred_setge, ALU_OP2_PRED_SETGE},
1261 {op2_pred_setne, ALU_OP2_PRED_SETNE},
1262 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1263 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1264 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1265 {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1266 {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1267 {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1268 {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1269 {op2_kille, ALU_OP2_KILLE},
1270 {op2_killgt, ALU_OP2_KILLGT},
1271 {op2_killge, ALU_OP2_KILLGE},
1272 {op2_killne, ALU_OP2_KILLNE},
1273 {op2_and_int, ALU_OP2_AND_INT},
1274 {op2_or_int, ALU_OP2_OR_INT},
1275 {op2_xor_int, ALU_OP2_XOR_INT},
1276 {op1_not_int, ALU_OP1_NOT_INT},
1277 {op2_add_int, ALU_OP2_ADD_INT},
1278 {op2_sub_int, ALU_OP2_SUB_INT},
1279 {op2_max_int, ALU_OP2_MAX_INT},
1280 {op2_min_int, ALU_OP2_MIN_INT},
1281 {op2_max_uint, ALU_OP2_MAX_UINT},
1282 {op2_min_uint, ALU_OP2_MIN_UINT},
1283 {op2_sete_int, ALU_OP2_SETE_INT},
1284 {op2_setgt_int, ALU_OP2_SETGT_INT},
1285 {op2_setge_int, ALU_OP2_SETGE_INT},
1286 {op2_setne_int, ALU_OP2_SETNE_INT},
1287 {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1288 {op2_setge_uint, ALU_OP2_SETGE_UINT},
1289 {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1290 {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1291 //p2_prede_int, ALU_OP2_PREDE_INT},
1292 {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1293 {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1294 {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1295 {op2_kille_int, ALU_OP2_KILLE_INT},
1296 {op2_killgt_int, ALU_OP2_KILLGT_INT},
1297 {op2_killge_int, ALU_OP2_KILLGE_INT},
1298 {op2_killne_int, ALU_OP2_KILLNE_INT},
1299 {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1300 {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1301 {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1302 {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1303 {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1304 {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1305 {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1306 {op1_bfrev_int, ALU_OP1_BFREV_INT},
1307 {op2_addc_uint, ALU_OP2_ADDC_UINT},
1308 {op2_subb_uint, ALU_OP2_SUBB_UINT},
1309 {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1310 {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1311 {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1312 {op2_set_mode, ALU_OP2_SET_MODE},
1313 {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1314 {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1315 {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1316 {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1317 {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1318 {op1_log_ieee, ALU_OP1_LOG_IEEE},
1319 {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1320 {op1_recip_ff, ALU_OP1_RECIP_FF},
1321 {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1322 {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1323 {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1324 {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1325 {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1326 {op1_sin, ALU_OP1_SIN},
1327 {op1_cos, ALU_OP1_COS},
1328 {op2_mullo_int, ALU_OP2_MULLO_INT},
1329 {op2_mulhi_int, ALU_OP2_MULHI_INT},
1330 {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1331 {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1332 {op1_recip_int, ALU_OP1_RECIP_INT},
1333 {op1_recip_uint, ALU_OP1_RECIP_UINT},
1334 {op1_recip_64, ALU_OP2_RECIP_64},
1335 {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1336 {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1337 {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1338 {op1_sqrt_64, ALU_OP2_SQRT_64},
1339 {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1340 {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1341 {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1342 {op2_bfm_int, ALU_OP2_BFM_INT},
1343 {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1344 {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1345 {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1346 {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1347 {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1348 {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1349 {op1_bcnt_int, ALU_OP1_BCNT_INT},
1350 {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1351 {op1_ffbl_int, ALU_OP1_FFBL_INT},
1352 {op1_ffbh_int, ALU_OP1_FFBH_INT},
1353 {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1354 {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1355 {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1356 {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1357 {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1358 {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1359 {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1360 {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1361 {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1362 {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1363 {op2_sete_64, ALU_OP2_SETE_64},
1364 {op2_setne_64, ALU_OP2_SETNE_64},
1365 {op2_setgt_64, ALU_OP2_SETGT_64},
1366 {op2_setge_64, ALU_OP2_SETGE_64},
1367 {op2_min_64, ALU_OP2_MIN_64},
1368 {op2_max_64, ALU_OP2_MAX_64},
1369 {op2_dot4, ALU_OP2_DOT4},
1370 {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1371 {op2_cube, ALU_OP2_CUBE},
1372 {op1_max4, ALU_OP1_MAX4},
1373 {op1_frexp_64, ALU_OP1_FREXP_64},
1374 {op1_ldexp_64, ALU_OP2_LDEXP_64},
1375 {op1_fract_64, ALU_OP1_FRACT_64},
1376 {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1377 {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1378 {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1379 {op2_add_64, ALU_OP2_ADD_64},
1380 {op1_mova_int, ALU_OP1_MOVA_INT},
1381 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1382 {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1383 {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1384 {op2_dot, ALU_OP2_DOT},
1385 //p2_mul_prev, ALU_OP2_MUL_PREV},
1386 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1387 //p2_add_prev, ALU_OP2_ADD_PREV},
1388 {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1389 {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1390 {op2_interp_xy, ALU_OP2_INTERP_XY},
1391 {op2_interp_zw, ALU_OP2_INTERP_ZW},
1392 {op2_interp_x, ALU_OP2_INTERP_X},
1393 {op2_interp_z, ALU_OP2_INTERP_Z},
1394 {op0_store_flags, ALU_OP1_STORE_FLAGS},
1395 {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1396 {op0_lds_1a, ALU_OP2_LDS_1A},
1397 {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1398 {op0_lds_2a, ALU_OP2_LDS_2A},
1399 {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1400 {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1401 {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1402 // {op 3 all left shift 6
1403 {op3_bfe_uint, ALU_OP3_BFE_UINT},
1404 {op3_bfe_int, ALU_OP3_BFE_INT},
1405 {op3_bfi_int, ALU_OP3_BFI_INT},
1406 {op3_fma, ALU_OP3_FMA},
1407 {op3_cndne_64, ALU_OP3_CNDNE_64},
1408 {op3_fma_64, ALU_OP3_FMA_64},
1409 {op3_lerp_uint, ALU_OP3_LERP_UINT},
1410 {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1411 {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1412 {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1413 {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1414 {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1415 {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1416 {op3_muladd, ALU_OP3_MULADD},
1417 {op3_muladd_m2, ALU_OP3_MULADD_M2},
1418 {op3_muladd_m4, ALU_OP3_MULADD_M4},
1419 {op3_muladd_d2, ALU_OP3_MULADD_D2},
1420 {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1421 {op3_cnde, ALU_OP3_CNDE},
1422 {op3_cndgt, ALU_OP3_CNDGT},
1423 {op3_cndge, ALU_OP3_CNDGE},
1424 {op3_cnde_int, ALU_OP3_CNDE_INT},
1425 {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1426 {op3_cndge_int, ALU_OP3_CNDGE_INT},
1427 {op3_mul_lit, ALU_OP3_MUL_LIT},
1428 };
1429
1430 const std::map<ESDOp, int> ds_opcode_map = {
1431 {DS_OP_ADD, FETCH_OP_GDS_ADD},
1432 {DS_OP_SUB, FETCH_OP_GDS_SUB},
1433 {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1434 {DS_OP_INC, FETCH_OP_GDS_INC},
1435 {DS_OP_DEC, FETCH_OP_GDS_DEC},
1436 {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1437 {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1438 {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1439 {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1440 {DS_OP_AND, FETCH_OP_GDS_AND},
1441 {DS_OP_OR, FETCH_OP_GDS_OR},
1442 {DS_OP_XOR, FETCH_OP_GDS_XOR},
1443 {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1444 {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1445 {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1446 {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1447 {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1448 {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1449 {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1450 {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1451 {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1452 {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1453 {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1454 {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1455 {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1456 {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1457 {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1458 {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1459 {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1460 {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1461 {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1462 {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1463 {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1464 {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1465 {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1466 {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1467 {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1468 {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1469 {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1470 {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1471 {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1472 {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1473 {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1474 {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1475 {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1476 {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1477 {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1478 {DS_OP_INVALID, 0},
1479 };
1480
1481 }
1482