1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31 
32 namespace r600 {
33 
FragmentShaderFromNir(const nir_shader & nir,r600_shader & sh,r600_pipe_shader_selector & sel,const r600_shader_key & key,enum chip_class chip_class)34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35                                              r600_shader& sh,
36                                              r600_pipe_shader_selector &sel,
37                                              const r600_shader_key &key,
38                                              enum chip_class chip_class):
39    ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
40    m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41    m_max_counted_color_exports(0),
42    m_two_sided_color(key.ps.color_two_side),
43    m_last_pixel_export(nullptr),
44    m_nir(nir),
45    m_reserved_registers(0),
46    m_frag_pos_index(0),
47    m_need_back_color(false),
48    m_front_face_loaded(false),
49    m_depth_exports(0),
50    m_apply_sample_mask(key.ps.apply_sample_id_mask),
51    m_dual_source_blend(key.ps.dual_source_blend),
52    m_pos_input(nullptr)
53 {
54    for (auto&  i: m_interpolator) {
55       i.enabled = false;
56       i.ij_index= 0;
57    }
58 
59    sh_info().rat_base = key.ps.nr_cbufs;
60    sh_info().atomic_base = key.ps.first_atomic_counter;
61 }
62 
barycentric_ij_index(nir_intrinsic_instr * instr)63 unsigned barycentric_ij_index(nir_intrinsic_instr *instr)
64 {
65    unsigned index = 0;
66    switch (instr->intrinsic) {
67    case nir_intrinsic_load_barycentric_sample:
68       index = 0;
69       break;
70    case nir_intrinsic_load_barycentric_at_sample:
71    case nir_intrinsic_load_barycentric_at_offset:
72    case nir_intrinsic_load_barycentric_pixel:
73       index = 1;
74       break;
75    case nir_intrinsic_load_barycentric_centroid:
76       index = 2;
77       break;
78    default:
79       unreachable("Unknown interpolator intrinsic");
80    }
81 
82    switch (nir_intrinsic_interp_mode(instr)) {
83    case INTERP_MODE_NONE:
84    case INTERP_MODE_SMOOTH:
85    case INTERP_MODE_COLOR:
86       return index;
87    case INTERP_MODE_NOPERSPECTIVE:
88       return index + 3;
89    case INTERP_MODE_FLAT:
90    case INTERP_MODE_EXPLICIT:
91    default:
92       unreachable("unknown/unsupported mode for load_interpolated");
93    }
94    return 0;
95 }
96 
process_load_input(nir_intrinsic_instr * instr,bool interpolated)97 bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr,
98                                                bool interpolated)
99 {
100    sfn_log << SfnLog::io << "Parse " << instr->instr
101            << "\n";
102 
103    auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]);
104    assert(index);
105 
106    unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32;
107    auto semantic = r600_get_varying_semantic(location);
108    tgsi_semantic name = (tgsi_semantic)semantic.first;
109    unsigned sid = semantic.second;
110 
111 
112    if (location == VARYING_SLOT_POS) {
113       m_sv_values.set(es_pos);
114       m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
115                                                nir_intrinsic_component(instr),
116                                                nir_dest_num_components(instr->dest),
117                                                TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER);
118       m_shaderio.add_input(m_pos_input);
119       return true;
120    }
121 
122    if (location == VARYING_SLOT_FACE) {
123       m_sv_values.set(es_face);
124       return true;
125    }
126 
127 
128    tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
129    tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
130 
131    bool uses_interpol_at_centroid = false;
132 
133    if (interpolated) {
134 
135       glsl_interp_mode mode = INTERP_MODE_NONE;
136       auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
137       mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
138       switch (parent->intrinsic) {
139       case nir_intrinsic_load_barycentric_sample:
140          tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
141          break;
142       case nir_intrinsic_load_barycentric_at_sample:
143       case nir_intrinsic_load_barycentric_at_offset:
144       case nir_intrinsic_load_barycentric_pixel:
145          tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
146          break;
147       case nir_intrinsic_load_barycentric_centroid:
148          tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
149          uses_interpol_at_centroid = true;
150          break;
151       default:
152          std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
153                    << nir_intrinsic_infos[instr->intrinsic].name
154                    << " interpolator?\n";
155          assert(0);
156       }
157 
158       switch (mode) {
159       case INTERP_MODE_NONE:
160          if (name == TGSI_SEMANTIC_COLOR) {
161             tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
162             break;
163       }
164          FALLTHROUGH;
165       case INTERP_MODE_SMOOTH:
166          tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
167          break;
168       case INTERP_MODE_NOPERSPECTIVE:
169          tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
170          break;
171       case INTERP_MODE_FLAT:
172          break;
173       case INTERP_MODE_COLOR:
174          tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
175          break;
176       case INTERP_MODE_EXPLICIT:
177       default:
178          assert(0);
179       }
180 
181       m_interpolators_used.set(barycentric_ij_index(parent));
182 
183    }
184 
185    switch (name) {
186    case TGSI_SEMANTIC_COLOR: {
187       auto input = m_shaderio.find_varying(name, sid);
188       if (!input) {
189          m_shaderio.add_input(new ShaderInputColor(name, sid,
190                                                    nir_intrinsic_base(instr) + index->u32,
191                                                    nir_intrinsic_component(instr),
192                                                    nir_dest_num_components(instr->dest),
193                                                    tgsi_interpolate, tgsi_loc));
194       }  else {
195          if (uses_interpol_at_centroid)
196             input->set_uses_interpolate_at_centroid();
197 
198          auto varying = static_cast<ShaderInputVarying&>(*input);
199          varying.update_mask(nir_dest_num_components(instr->dest),
200                              nir_intrinsic_component(instr));
201       }
202 
203       m_need_back_color = m_two_sided_color;
204       return true;
205    }
206    case TGSI_SEMANTIC_PRIMID:
207       sh_info().gs_prim_id_input = true;
208       sh_info().ps_prim_id_input = m_shaderio.inputs().size();
209       FALLTHROUGH;
210    case TGSI_SEMANTIC_FOG:
211    case TGSI_SEMANTIC_GENERIC:
212    case TGSI_SEMANTIC_TEXCOORD:
213    case TGSI_SEMANTIC_LAYER:
214    case TGSI_SEMANTIC_PCOORD:
215    case TGSI_SEMANTIC_VIEWPORT_INDEX:
216    case TGSI_SEMANTIC_CLIPDIST: {
217       auto input = m_shaderio.find_varying(name, sid);
218       if (!input) {
219          m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
220                                                      nir_intrinsic_component(instr),
221                                                      nir_dest_num_components(instr->dest),
222                                                      tgsi_interpolate, tgsi_loc));
223       } else {
224          if (uses_interpol_at_centroid)
225             input->set_uses_interpolate_at_centroid();
226 
227          auto varying = static_cast<ShaderInputVarying&>(*input);
228          varying.update_mask(nir_dest_num_components(instr->dest),
229                              nir_intrinsic_component(instr));
230       }
231 
232       return true;
233    }
234    default:
235       return false;
236    }
237 }
238 
239 
scan_sysvalue_access(nir_instr * instr)240 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
241 {
242    switch (instr->type) {
243    case nir_instr_type_intrinsic: {
244       nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
245 
246       switch (ii->intrinsic) {
247       case nir_intrinsic_load_front_face:
248          m_sv_values.set(es_face);
249          break;
250       case nir_intrinsic_load_sample_mask_in:
251          m_sv_values.set(es_sample_mask_in);
252          break;
253       case nir_intrinsic_load_sample_pos:
254          m_sv_values.set(es_sample_pos);
255          FALLTHROUGH;
256       case nir_intrinsic_load_sample_id:
257          m_sv_values.set(es_sample_id);
258          break;
259       case nir_intrinsic_load_helper_invocation:
260          m_sv_values.set(es_helper_invocation);
261          sh_info().uses_helper_invocation = true;
262          break;
263       case nir_intrinsic_load_input:
264          return process_load_input(ii, false);
265       case nir_intrinsic_load_interpolated_input: {
266          return process_load_input(ii, true);
267       }
268       case nir_intrinsic_store_output:
269          return process_store_output(ii);
270 
271       default:
272          ;
273       }
274    }
275    default:
276       ;
277    }
278    return true;
279 }
280 
do_allocate_reserved_registers()281 bool FragmentShaderFromNir::do_allocate_reserved_registers()
282 {
283    assert(!m_reserved_registers);
284 
285    int face_reg_index = -1;
286    int sample_id_index = -1;
287    // enabled interpolators based on inputs
288    for (unsigned i = 0; i < s_max_interpolators; ++i) {
289       if (m_interpolators_used.test(i)) {
290          sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
291          m_interpolator[i].enabled = true;
292       }
293    }
294 
295    // sort the varying inputs
296    m_shaderio.sort_varying_inputs();
297 
298    // handle interpolators
299    int num_baryc = 0;
300    for (int i = 0; i < 6; ++i) {
301       if (m_interpolator[i].enabled) {
302          sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
303 
304          m_interpolator[i].ij_index = num_baryc;
305 
306          unsigned sel = num_baryc / 2;
307          unsigned chan = 2 * (num_baryc % 2);
308 
309          auto ip_i = new GPRValue(sel, chan + 1);
310          ip_i->set_as_input();
311          m_interpolator[i].i.reset(ip_i);
312          inject_register(sel, chan + 1, m_interpolator[i].i, false);
313 
314          auto ip_j = new GPRValue(sel, chan);
315          ip_j->set_as_input();
316          m_interpolator[i].j.reset(ip_j);
317          inject_register(sel, chan, m_interpolator[i].j, false);
318 
319          ++num_baryc;
320       }
321    }
322    m_reserved_registers += (num_baryc + 1) >> 1;
323 
324    if (m_sv_values.test(es_pos)) {
325       m_frag_pos_index = m_reserved_registers++;
326       assert(m_pos_input);
327       m_pos_input->set_gpr(m_frag_pos_index);
328    }
329 
330    // handle system values
331    if (m_sv_values.test(es_face) || m_need_back_color) {
332       face_reg_index = m_reserved_registers++;
333       m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
334       m_front_face_reg->set_as_input();
335       sfn_log << SfnLog::io << "Set front_face register to " <<  *m_front_face_reg << "\n";
336       inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
337 
338       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
339       load_front_face();
340    }
341 
342    if (m_sv_values.test(es_sample_mask_in)) {
343       if (face_reg_index < 0)
344          face_reg_index = m_reserved_registers++;
345 
346       m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
347       m_sample_mask_reg->set_as_input();
348       sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
349       sh_info().nsys_inputs = 1;
350       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
351    }
352 
353    if (m_sv_values.test(es_sample_id) ||
354        m_sv_values.test(es_sample_mask_in)) {
355       if (sample_id_index < 0)
356          sample_id_index = m_reserved_registers++;
357 
358       m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
359       m_sample_id_reg->set_as_input();
360       sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
361       sh_info().nsys_inputs++;
362       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
363    }
364 
365    // The back color handling is not emmited in the code, so we have
366    // to add the inputs here and later we also need to inject the code to set
367    // the right color
368    if (m_need_back_color) {
369       size_t ninputs = m_shaderio.inputs().size();
370       for (size_t k = 0; k < ninputs; ++k) {
371          ShaderInput& i = m_shaderio.input(k);
372 
373          if (i.name() != TGSI_SEMANTIC_COLOR)
374             continue;
375 
376          ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
377 
378          size_t next_pos = m_shaderio.size();
379          auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
380          m_shaderio.add_input(bcol);
381          col.set_back_color(next_pos);
382       }
383       m_shaderio.set_two_sided();
384    }
385 
386    m_shaderio.update_lds_pos();
387 
388    set_reserved_registers(m_reserved_registers);
389 
390    return true;
391 }
392 
emit_shader_start()393 void FragmentShaderFromNir::emit_shader_start()
394 {
395    if (m_sv_values.test(es_face))
396       load_front_face();
397 
398    if (m_sv_values.test(es_pos)) {
399       for (int i = 0; i < 4; ++i) {
400          auto v = new GPRValue(m_frag_pos_index, i);
401          v->set_as_input();
402          auto reg = PValue(v);
403          m_frag_pos[i] = reg;
404       }
405    }
406 
407    if (m_sv_values.test(es_helper_invocation)) {
408       m_helper_invocation = get_temp_register();
409       auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
410       emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
411       GPRVector dst({dummy, dummy, dummy, dummy});
412       std::array<int,4> swz = {7,7,7,7};
413       dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation);
414       swz[m_helper_invocation->chan()] = 4;
415 
416       auto vtx = new FetchInstruction(dst, m_helper_invocation,
417                                       R600_BUFFER_INFO_CONST_BUFFER, bim_none);
418       vtx->set_flag(vtx_vpm);
419       vtx->set_flag(vtx_use_tc);
420       vtx->set_dest_swizzle(swz);
421       emit_instruction(vtx);
422    }
423 }
424 
process_store_output(nir_intrinsic_instr * instr)425 bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr)
426 {
427 
428    auto semantic = nir_intrinsic_io_semantics(instr);
429    unsigned driver_loc = nir_intrinsic_base(instr);
430 
431    if (sh_info().noutput <= driver_loc)
432       sh_info().noutput = driver_loc + 1;
433 
434    r600_shader_io& io = sh_info().output[driver_loc];
435    tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location),
436                                     &io.name, &io.sid);
437 
438    unsigned component = nir_intrinsic_component(instr);
439    io.write_mask |= nir_intrinsic_write_mask(instr) << component;
440 
441    if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
442       sh_info().fs_write_all = true;
443    }
444 
445    if (semantic.location == FRAG_RESULT_COLOR ||
446        (semantic.location >= FRAG_RESULT_DATA0 &&
447         semantic.location <= FRAG_RESULT_DATA7))  {
448       ++m_max_counted_color_exports;
449 
450       /* Hack: force dual source output handling if one color output has a
451        * dual_source_blend_index > 0 */
452       if (semantic.location == FRAG_RESULT_COLOR &&
453           semantic.dual_source_blend_index > 0)
454          m_dual_source_blend = true;
455 
456       if (m_max_counted_color_exports > 1)
457          sh_info().fs_write_all = false;
458       return true;
459    }
460 
461    if (semantic.location == FRAG_RESULT_DEPTH ||
462        semantic.location == FRAG_RESULT_STENCIL ||
463        semantic.location == FRAG_RESULT_SAMPLE_MASK) {
464       io.write_mask = 15;
465       return true;
466    }
467 
468    return false;
469 
470 
471 }
472 
emit_load_sample_mask_in(nir_intrinsic_instr * instr)473 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
474 {
475    auto dest = from_nir(instr->dest, 0);
476    assert(m_sample_id_reg);
477    assert(m_sample_mask_reg);
478 
479    emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
480    emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
481    return true;
482 }
483 
emit_intrinsic_instruction_override(nir_intrinsic_instr * instr)484 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
485 {
486    switch (instr->intrinsic) {
487    case nir_intrinsic_load_sample_mask_in:
488       if (m_apply_sample_mask) {
489          return emit_load_sample_mask_in(instr);
490       } else
491          return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
492    case nir_intrinsic_load_sample_id:
493       return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
494    case nir_intrinsic_load_front_face:
495       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
496    case nir_intrinsic_load_sample_pos:
497       return emit_load_sample_pos(instr);
498    case nir_intrinsic_load_helper_invocation:
499       return load_preloaded_value(instr->dest, 0, m_helper_invocation);
500    case nir_intrinsic_load_input:
501       return emit_load_input(instr);
502    case nir_intrinsic_load_barycentric_sample:
503    case nir_intrinsic_load_barycentric_pixel:
504    case nir_intrinsic_load_barycentric_centroid:  {
505       unsigned ij = barycentric_ij_index(instr);
506       return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) &&
507             load_preloaded_value(instr->dest, 1, m_interpolator[ij].j);
508    }
509    case nir_intrinsic_load_barycentric_at_offset:
510          return load_barycentric_at_offset(instr);
511    case nir_intrinsic_load_barycentric_at_sample:
512       return load_barycentric_at_sample(instr);
513 
514    case nir_intrinsic_load_interpolated_input: {
515       return emit_load_interpolated_input(instr);
516    }
517    case nir_intrinsic_store_output:
518       return emit_store_output(instr);
519 
520    default:
521       return false;
522    }
523 }
524 
emit_store_output(nir_intrinsic_instr * instr)525 bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr)
526 {
527    auto location = nir_intrinsic_io_semantics(instr).location;
528 
529    if (location == FRAG_RESULT_COLOR)
530       return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports);
531 
532    if ((location >= FRAG_RESULT_DATA0 &&
533         location <= FRAG_RESULT_DATA7) ||
534        location == FRAG_RESULT_DEPTH ||
535        location == FRAG_RESULT_STENCIL ||
536        location == FRAG_RESULT_SAMPLE_MASK)
537       return emit_export_pixel(instr, 1);
538 
539    sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
540    return false;
541 
542 }
543 
emit_load_interpolated_input(nir_intrinsic_instr * instr)544 bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
545 {
546    unsigned loc = nir_intrinsic_io_semantics(instr).location;
547    switch (loc) {
548    case VARYING_SLOT_POS:
549       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
550          load_preloaded_value(instr->dest, i, m_frag_pos[i]);
551       }
552       return true;
553    case VARYING_SLOT_FACE:
554       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
555    default:
556       ;
557    }
558 
559    auto param = nir_src_as_const_value(instr->src[1]);
560    assert(param && "Indirect PS inputs not (yet) supported");
561 
562    auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
563    auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
564 
565    io.set_gpr(dst.sel());
566 
567    Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)};
568 
569 
570    if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
571                           nir_intrinsic_component(instr)))
572       return false;
573 
574    if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
575 
576       auto & color_input  = static_cast<ShaderInputColor&> (io);
577       auto& bgio = m_shaderio.input(color_input.back_color_input_index());
578 
579       GPRVector bgcol = get_temp_vec4();
580       bgio.set_gpr(bgcol.sel());
581       load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0);
582 
583       load_front_face();
584 
585       AluInstruction *ir = nullptr;
586       for (unsigned i = 0; i < 4 ; ++i) {
587          ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
588          emit_instruction(ir);
589       }
590       if (ir)
591          ir->set_flag(alu_last_instr);
592    }
593 
594 
595    AluInstruction *ir = nullptr;
596    if (nir_intrinsic_component(instr) != 0) {
597       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
598          ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
599          emit_instruction(ir);
600       }
601       if (ir)
602          ir->set_flag(alu_last_instr);
603    }
604 
605    return true;
606 }
607 
load_barycentric_at_offset(nir_intrinsic_instr * instr)608 bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr)
609 {
610    auto interpolator = m_interpolator[barycentric_ij_index(instr)];
611    PValue dummy(new GPRValue(interpolator.i->sel(), 0));
612 
613    GPRVector help = get_temp_vec4();
614    GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
615 
616    auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
617    getgradh->set_dest_swizzle({0,1,7,7});
618    getgradh->set_flag(TexInstruction::x_unnormalized);
619    getgradh->set_flag(TexInstruction::y_unnormalized);
620    getgradh->set_flag(TexInstruction::z_unnormalized);
621    getgradh->set_flag(TexInstruction::w_unnormalized);
622    getgradh->set_flag(TexInstruction::grad_fine);
623    emit_instruction(getgradh);
624 
625    auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
626    getgradv->set_dest_swizzle({7,7,0,1});
627    getgradv->set_flag(TexInstruction::x_unnormalized);
628    getgradv->set_flag(TexInstruction::y_unnormalized);
629    getgradv->set_flag(TexInstruction::z_unnormalized);
630    getgradv->set_flag(TexInstruction::w_unnormalized);
631    getgradv->set_flag(TexInstruction::grad_fine);
632    emit_instruction(getgradv);
633 
634    PValue ofs_x = from_nir(instr->src[0], 0);
635    PValue ofs_y = from_nir(instr->src[0], 1);
636    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
637    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
638    emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write}));
639    emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr}));
640 
641    return true;
642 }
643 
load_barycentric_at_sample(nir_intrinsic_instr * instr)644 bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr)
645 {
646    GPRVector slope = get_temp_vec4();
647 
648    auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
649                                      from_nir_with_fetch_constant(instr->src[0], 0),
650                                      0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
651    fetch->set_flag(vtx_srf_mode);
652    emit_instruction(fetch);
653 
654    GPRVector grad = get_temp_vec4();
655 
656    auto interpolator = m_interpolator[barycentric_ij_index(instr)];
657    assert(interpolator.enabled);
658    PValue dummy(new GPRValue(interpolator.i->sel(), 0));
659 
660    GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
661 
662    auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
663    tex->set_flag(TexInstruction::grad_fine);
664    tex->set_flag(TexInstruction::x_unnormalized);
665    tex->set_flag(TexInstruction::y_unnormalized);
666    tex->set_flag(TexInstruction::z_unnormalized);
667    tex->set_flag(TexInstruction::w_unnormalized);
668    tex->set_dest_swizzle({0,1,7,7});
669    emit_instruction(tex);
670 
671    tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
672    tex->set_flag(TexInstruction::x_unnormalized);
673    tex->set_flag(TexInstruction::y_unnormalized);
674    tex->set_flag(TexInstruction::z_unnormalized);
675    tex->set_flag(TexInstruction::w_unnormalized);
676    tex->set_flag(TexInstruction::grad_fine);
677    tex->set_dest_swizzle({7,7,0,1});
678    emit_instruction(tex);
679 
680    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
681    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
682 
683    emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write}));
684    emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr}));
685 
686    return true;
687 }
688 
emit_load_input(nir_intrinsic_instr * instr)689 bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr)
690 {
691    unsigned loc = nir_intrinsic_io_semantics(instr).location;
692    auto param = nir_src_as_const_value(instr->src[0]);
693    assert(param && "Indirect PS inputs not (yet) supported");
694 
695    auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
696 
697    assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
698 
699    unsigned num_components = nir_dest_num_components(instr->dest);
700 
701    switch (loc) {
702    case VARYING_SLOT_POS:
703       for (unsigned i = 0; i < num_components; ++i) {
704          load_preloaded_value(instr->dest, i, m_frag_pos[i]);
705       }
706       return true;
707    case VARYING_SLOT_FACE:
708       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
709    default:
710       ;
711    }
712 
713    auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
714 
715    AluInstruction *ir = nullptr;
716    for (unsigned i = 0; i < 4 ; ++i) {
717       ir = new AluInstruction(op1_interp_load_p0, dst[i],
718                               PValue(new InlineConstValue(ALU_SRC_PARAM_BASE +
719                                                           io.lds_pos(), i)),
720                               EmitInstruction::write);
721       emit_instruction(ir);
722    }
723    ir->set_flag(alu_last_instr);
724 
725    /* TODO: back color */
726    if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
727       Interpolator ip = {false, 0, NULL, NULL};
728 
729       auto & color_input  = static_cast<ShaderInputColor&> (io);
730       auto& bgio = m_shaderio.input(color_input.back_color_input_index());
731 
732       GPRVector bgcol = get_temp_vec4();
733       bgio.set_gpr(bgcol.sel());
734       load_interpolated(bgcol, bgio, ip, num_components, 0);
735 
736       load_front_face();
737 
738       AluInstruction *ir = nullptr;
739       for (unsigned i = 0; i < 4 ; ++i) {
740          ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
741          emit_instruction(ir);
742       }
743       if (ir)
744          ir->set_flag(alu_last_instr);
745    }
746 
747    if (nir_intrinsic_component(instr) != 0) {
748       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
749          ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
750          emit_instruction(ir);
751       }
752       if (ir)
753          ir->set_flag(alu_last_instr);
754    }
755 
756 
757    return true;
758 }
759 
load_front_face()760 void FragmentShaderFromNir::load_front_face()
761 {
762    assert(m_front_face_reg);
763    if (m_front_face_loaded)
764       return;
765 
766    auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
767                                 Value::zero, {alu_write, alu_last_instr});
768    m_front_face_loaded = true;
769    emit_instruction(ir);
770 }
771 
emit_load_sample_pos(nir_intrinsic_instr * instr)772 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
773 {
774    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
775    auto fetch = new FetchInstruction(vc_fetch,
776                                      no_index_offset,
777                                      fmt_32_32_32_32_float,
778                                      vtx_nf_scaled,
779                                      vtx_es_none,
780                                      m_sample_id_reg,
781                                      dest,
782                                      0,
783                                      false,
784                                      0xf,
785                                      R600_BUFFER_INFO_CONST_BUFFER,
786                                      0,
787                                      bim_none,
788                                      false,
789                                      false,
790                                      0,
791                                      0,
792                                      0,
793                                      PValue(),
794                                      {0,1,2,3});
795    fetch->set_flag(vtx_srf_mode);
796    emit_instruction(fetch);
797    return true;
798 }
799 
load_interpolated(GPRVector & dest,ShaderInput & io,const Interpolator & ip,int num_components,int start_comp)800 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
801                                               ShaderInput& io, const Interpolator &ip,
802                                               int num_components, int start_comp)
803 {
804    // replace io with ShaderInputVarying
805    if (io.interpolate() > 0) {
806 
807       sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i <<  ")" << "\n";
808 
809       if (num_components == 1) {
810          switch (start_comp) {
811          case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
812          case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
813          case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
814          case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
815          default:
816             assert(0);
817          }
818       }
819 
820       if (num_components == 2) {
821          switch (start_comp) {
822          case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
823          case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
824          case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
825                   load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
826          default:
827             assert(0);
828          }
829       }
830 
831       if (num_components == 3 && start_comp == 0)
832          return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
833                load_interpolated_one_comp(dest, io, ip, op2_interp_z);
834 
835       int full_write_mask = ((1 << num_components) - 1) << start_comp;
836 
837       bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
838       success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
839       return success;
840 
841    } else {
842       AluInstruction *ir = nullptr;
843       for (unsigned i = 0; i < 4 ; ++i) {
844          ir = new AluInstruction(op1_interp_load_p0, dest[i],
845                                  PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
846                                  EmitInstruction::write);
847          emit_instruction(ir);
848       }
849       ir->set_flag(alu_last_instr);
850    }
851    return true;
852 }
853 
load_interpolated_one_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op)854 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
855                                                        ShaderInput& io, const Interpolator& ip, EAluOp op)
856 {
857    for (unsigned i = 0; i < 2 ; ++i) {
858       int chan = i;
859       if (op == op2_interp_z)
860          chan += 2;
861 
862 
863       auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
864                                    PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
865                                    i == 0  ? EmitInstruction::write : EmitInstruction::last);
866       dest.pin_to_channel(chan);
867 
868       ir->set_bank_swizzle(alu_vec_210);
869       emit_instruction(ir);
870    }
871    return true;
872 }
873 
load_interpolated_two_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,int writemask)874 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
875                                                        const Interpolator& ip, EAluOp op, int writemask)
876 {
877    AluInstruction *ir = nullptr;
878    assert(ip.j);
879    assert(ip.i);
880    for (unsigned i = 0; i < 4 ; ++i) {
881       ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
882                               (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
883       dest.pin_to_channel(i);
884       ir->set_bank_swizzle(alu_vec_210);
885       emit_instruction(ir);
886    }
887    ir->set_flag(alu_last_instr);
888    return true;
889 }
890 
load_interpolated_two_comp_for_one(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,UNUSED int start,int comp)891 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
892                                                                ShaderInput& io, const Interpolator& ip,
893                                                                EAluOp op, UNUSED int start, int comp)
894 {
895    AluInstruction *ir = nullptr;
896    for (int i = 0; i <  4 ; ++i) {
897       ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
898                                    PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
899                                    i == comp ? EmitInstruction::write : EmitInstruction::empty);
900       ir->set_bank_swizzle(alu_vec_210);
901       dest.pin_to_channel(i);
902       emit_instruction(ir);
903    }
904    ir->set_flag(alu_last_instr);
905    return true;
906 }
907 
908 
emit_export_pixel(nir_intrinsic_instr * instr,int outputs)909 bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs)
910 {
911    std::array<uint32_t,4> swizzle;
912    unsigned writemask = nir_intrinsic_write_mask(instr);
913    auto semantics = nir_intrinsic_io_semantics(instr);
914    unsigned driver_location = nir_intrinsic_base(instr);
915 
916    switch (semantics.location) {
917    case FRAG_RESULT_DEPTH:
918       writemask = 1;
919       swizzle = {0,7,7,7};
920       break;
921    case FRAG_RESULT_STENCIL:
922       writemask = 2;
923       swizzle = {7,0,7,7};
924       break;
925    case FRAG_RESULT_SAMPLE_MASK:
926       writemask = 4;
927       swizzle = {7,7,0,7};
928       break;
929    default:
930       for (int i = 0; i < 4; ++i) {
931          swizzle[i] = (i < instr->num_components) ? i : 7;
932       }
933    }
934 
935    auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle);
936 
937    set_output(driver_location, value.sel());
938 
939    if (semantics.location == FRAG_RESULT_COLOR ||
940        (semantics.location >= FRAG_RESULT_DATA0 &&
941         semantics.location <= FRAG_RESULT_DATA7)) {
942       for (int k = 0 ; k < outputs; ++k) {
943 
944          unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
945                              ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
946 
947          sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
948 
949          if (location >= m_max_color_exports) {
950             sfn_log << SfnLog::io << "Pixel output loc:" << location
951                     << " dl:" << driver_location
952                     << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
953             continue;
954          }
955 
956          m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
957 
958          if (sh_info().ps_export_highest < location)
959             sh_info().ps_export_highest = location;
960 
961          sh_info().nr_ps_color_exports++;
962 
963          unsigned mask = (0xfu << (location * 4));
964          sh_info().ps_color_export_mask |= mask;
965 
966          emit_export_instruction(m_last_pixel_export);
967       };
968    } else if (semantics.location == FRAG_RESULT_DEPTH ||
969               semantics.location == FRAG_RESULT_STENCIL ||
970               semantics.location == FRAG_RESULT_SAMPLE_MASK) {
971       m_depth_exports++;
972       emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
973    } else {
974       return false;
975    }
976    return true;
977 }
978 
979 
emit_export_pixel(const nir_variable * out_var,nir_intrinsic_instr * instr,int outputs)980 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
981 {
982    std::array<uint32_t,4> swizzle;
983    unsigned writemask = nir_intrinsic_write_mask(instr);
984    switch (out_var->data.location) {
985    case FRAG_RESULT_DEPTH:
986       writemask = 1;
987       swizzle = {0,7,7,7};
988       break;
989    case FRAG_RESULT_STENCIL:
990       writemask = 2;
991       swizzle = {7,0,7,7};
992       break;
993    case FRAG_RESULT_SAMPLE_MASK:
994       writemask = 4;
995       swizzle = {7,7,0,7};
996       break;
997    default:
998       for (int i = 0; i < 4; ++i) {
999          swizzle[i] = (i < instr->num_components) ? i : 7;
1000       }
1001    }
1002 
1003    auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
1004 
1005    set_output(out_var->data.driver_location, value.sel());
1006 
1007    if (out_var->data.location == FRAG_RESULT_COLOR ||
1008        (out_var->data.location >= FRAG_RESULT_DATA0 &&
1009         out_var->data.location <= FRAG_RESULT_DATA7)) {
1010       for (int k = 0 ; k < outputs; ++k) {
1011 
1012          unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR)
1013                              ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
1014 
1015          sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
1016 
1017          if (location >= m_max_color_exports) {
1018             sfn_log << SfnLog::io << "Pixel output loc:" << location
1019                     << " dl:" << out_var->data.location
1020                     << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
1021             continue;
1022          }
1023 
1024          m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
1025 
1026          if (sh_info().ps_export_highest < location)
1027             sh_info().ps_export_highest = location;
1028 
1029          sh_info().nr_ps_color_exports++;
1030 
1031          unsigned mask = (0xfu << (location * 4));
1032          sh_info().ps_color_export_mask |= mask;
1033 
1034          emit_export_instruction(m_last_pixel_export);
1035       };
1036    } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
1037               out_var->data.location == FRAG_RESULT_STENCIL ||
1038               out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
1039       m_depth_exports++;
1040       emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
1041    } else {
1042       return false;
1043    }
1044    return true;
1045 }
1046 
do_finalize()1047 void FragmentShaderFromNir::do_finalize()
1048 {
1049    // update shader io info and set LDS etc.
1050    sh_info().ninput = m_shaderio.inputs().size();
1051 
1052    sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
1053    for (size_t i = 0; i < sh_info().ninput; ++i) {
1054       ShaderInput& input = m_shaderio.input(i);
1055       int ij_idx = (input.ij_index() < 6 &&
1056                     input.ij_index() >= 0) ? input.ij_index() : 0;
1057       input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
1058    }
1059 
1060    sh_info().two_side = m_shaderio.two_sided();
1061    sh_info().nlds = m_shaderio.nlds();
1062 
1063    if (!m_last_pixel_export) {
1064       GPRVector v(0, {7,7,7,7});
1065       m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
1066       sh_info().nr_ps_color_exports++;
1067       sh_info().ps_color_export_mask = 0xf;
1068       emit_export_instruction(m_last_pixel_export);
1069    }
1070 
1071    m_last_pixel_export->set_last();
1072 }
1073 
1074 }
1075