1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31 
32 namespace r600 {
33 
FragmentShaderFromNir(const nir_shader & nir,r600_shader & sh,r600_pipe_shader_selector & sel,const r600_shader_key & key,enum chip_class chip_class)34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35                                              r600_shader& sh,
36                                              r600_pipe_shader_selector &sel,
37                                              const r600_shader_key &key,
38                                              enum chip_class chip_class):
39    ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
40    m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41    m_max_counted_color_exports(0),
42    m_two_sided_color(key.ps.color_two_side),
43    m_last_pixel_export(nullptr),
44    m_nir(nir),
45    m_reserved_registers(0),
46    m_frag_pos_index(0),
47    m_need_back_color(false),
48    m_front_face_loaded(false),
49    m_depth_exports(0),
50    m_enable_centroid_interpolators(false),
51    m_apply_sample_mask(key.ps.apply_sample_id_mask)
52 {
53    for (auto&  i: m_interpolator) {
54       i.enabled = false;
55       i.ij_index= 0;
56    }
57 
58    sh_info().rat_base = key.ps.nr_cbufs;
59    sh_info().atomic_base = key.ps.first_atomic_counter;
60 }
61 
do_process_inputs(nir_variable * input)62 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
63 {
64    sfn_log << SfnLog::io << "Parse input variable "
65            << input->name << " location:" <<  input->data.location
66            << " driver-loc:" << input->data.driver_location
67            << " interpolation:" << input->data.interpolation
68            << "\n";
69 
70    if (input->data.location == VARYING_SLOT_FACE) {
71       m_sv_values.set(es_face);
72       return true;
73    }
74 
75    unsigned name, sid;
76    auto semantic = r600_get_varying_semantic(input->data.location);
77    name = semantic.first;
78    sid = semantic.second;
79 
80    tgsi_semantic sname = static_cast<tgsi_semantic>(name);
81 
82    switch (sname) {
83    case TGSI_SEMANTIC_POSITION: {
84       m_sv_values.set(es_pos);
85       return true;
86    }
87    case TGSI_SEMANTIC_COLOR: {
88       m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
89       m_need_back_color = m_two_sided_color;
90       return true;
91    }
92    case TGSI_SEMANTIC_PRIMID:
93       sh_info().gs_prim_id_input = true;
94       sh_info().ps_prim_id_input = m_shaderio.inputs().size();
95       /* fallthrough */
96    case TGSI_SEMANTIC_FOG:
97    case TGSI_SEMANTIC_GENERIC:
98    case TGSI_SEMANTIC_TEXCOORD:
99    case TGSI_SEMANTIC_LAYER:
100    case TGSI_SEMANTIC_PCOORD:
101    case TGSI_SEMANTIC_VIEWPORT_INDEX:
102    case TGSI_SEMANTIC_CLIPDIST: {
103       if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
104          m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
105       return true;
106    }
107    default:
108       return false;
109    }
110 }
111 
scan_sysvalue_access(nir_instr * instr)112 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
113 {
114    switch (instr->type) {
115    case nir_instr_type_intrinsic: {
116       nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
117       switch (ii->intrinsic) {
118       case nir_intrinsic_load_front_face:
119          m_sv_values.set(es_face);
120          break;
121       case nir_intrinsic_load_sample_mask_in:
122          m_sv_values.set(es_sample_mask_in);
123          break;
124       case nir_intrinsic_load_sample_pos:
125          m_sv_values.set(es_sample_pos);
126          /* fallthrough */
127       case nir_intrinsic_load_sample_id:
128          m_sv_values.set(es_sample_id);
129          break;
130       case nir_intrinsic_interp_deref_at_centroid:
131          /* This is not a sysvalue, should go elsewhere */
132          m_enable_centroid_interpolators = true;
133          break;
134       default:
135          ;
136       }
137    }
138    default:
139       ;
140    }
141    return true;
142 }
143 
do_allocate_reserved_registers()144 bool FragmentShaderFromNir::do_allocate_reserved_registers()
145 {
146    assert(!m_reserved_registers);
147 
148    int face_reg_index = -1;
149    int sample_id_index = -1;
150    // enabled interpolators based on inputs
151    for (auto& i: m_shaderio.inputs()) {
152       int ij = i->ij_index();
153       if (ij >= 0) {
154          m_interpolator[ij].enabled = true;
155       }
156    }
157 
158    /* Lazy, enable both possible interpolators,
159     * TODO: check which ones are really needed */
160    if (m_enable_centroid_interpolators) {
161       m_interpolator[2].enabled = true; /* perspective */
162       m_interpolator[5].enabled = true; /* linear */
163    }
164 
165    // sort the varying inputs
166    m_shaderio.sort_varying_inputs();
167 
168    // handle interpolators
169    int num_baryc = 0;
170    for (int i = 0; i < 6; ++i) {
171       if (m_interpolator[i].enabled) {
172          sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
173 
174          m_interpolator[i].ij_index = num_baryc;
175 
176          unsigned sel = num_baryc / 2;
177          unsigned chan = 2 * (num_baryc % 2);
178 
179          auto ip_i = new GPRValue(sel, chan + 1);
180          ip_i->set_as_input();
181          m_interpolator[i].i.reset(ip_i);
182          inject_register(sel, chan + 1, m_interpolator[i].i, false);
183 
184          auto ip_j = new GPRValue(sel, chan);
185          ip_j->set_as_input();
186          m_interpolator[i].j.reset(ip_j);
187          inject_register(sel, chan, m_interpolator[i].j, false);
188 
189          ++num_baryc;
190       }
191    }
192    m_reserved_registers += (num_baryc + 1) >> 1;
193 
194    if (m_sv_values.test(es_pos)) {
195       m_frag_pos_index = m_reserved_registers++;
196       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
197    }
198 
199    // handle system values
200    if (m_sv_values.test(es_face) || m_need_back_color) {
201       face_reg_index = m_reserved_registers++;
202       auto ffr = new GPRValue(face_reg_index,0);
203       ffr->set_as_input();
204       m_front_face_reg.reset(ffr);
205       sfn_log << SfnLog::io << "Set front_face register to " <<  *m_front_face_reg << "\n";
206       inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
207 
208       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
209       load_front_face();
210    }
211 
212    if (m_sv_values.test(es_sample_mask_in)) {
213       if (face_reg_index < 0)
214          face_reg_index = m_reserved_registers++;
215 
216       auto smi = new GPRValue(face_reg_index,2);
217       smi->set_as_input();
218       m_sample_mask_reg.reset(smi);
219       sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
220       //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
221       sh_info().nsys_inputs = 1;
222       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
223    }
224 
225    if (m_sv_values.test(es_sample_id) ||
226        m_sv_values.test(es_sample_mask_in)) {
227       if (sample_id_index < 0)
228          sample_id_index = m_reserved_registers++;
229 
230       auto smi = new GPRValue(sample_id_index, 3);
231       smi->set_as_input();
232       m_sample_id_reg.reset(smi);
233       sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
234       sh_info().nsys_inputs++;
235       m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
236    }
237 
238    // The back color handling is not emmited in the code, so we have
239    // to add the inputs here and later we also need to inject the code to set
240    // the right color
241    if (m_need_back_color) {
242       size_t ninputs = m_shaderio.inputs().size();
243       for (size_t k = 0; k < ninputs; ++k) {
244          ShaderInput& i = m_shaderio.input(k);
245 
246          if (i.name() != TGSI_SEMANTIC_COLOR)
247             continue;
248 
249          ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
250 
251          size_t next_pos = m_shaderio.size();
252          auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
253          m_shaderio.add_input(bcol);
254          col.set_back_color(next_pos);
255       }
256       m_shaderio.set_two_sided();
257    }
258 
259    m_shaderio.update_lds_pos();
260 
261    set_reserved_registers(m_reserved_registers);
262 
263    return true;
264 }
265 
emit_shader_start()266 void FragmentShaderFromNir::emit_shader_start()
267 {
268    if (m_sv_values.test(es_face))
269       load_front_face();
270 
271    if (m_sv_values.test(es_pos)) {
272       for (int i = 0; i < 4; ++i) {
273          auto v = new GPRValue(m_frag_pos_index, i);
274          v->set_as_input();
275          auto reg = PValue(v);
276          if (i == 3)
277             emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
278          m_frag_pos[i] = reg;
279       }
280    }
281 }
282 
do_emit_store_deref(const nir_variable * out_var,nir_intrinsic_instr * instr)283 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
284 {
285    if (out_var->data.location == FRAG_RESULT_COLOR)
286       return emit_export_pixel(out_var, instr, true);
287 
288    if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
289         out_var->data.location <= FRAG_RESULT_DATA7) ||
290        out_var->data.location == FRAG_RESULT_DEPTH ||
291        out_var->data.location == FRAG_RESULT_STENCIL ||
292        out_var->data.location == FRAG_RESULT_SAMPLE_MASK)
293       return emit_export_pixel(out_var, instr, false);
294 
295    sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
296               out_var->data.location << "(" << out_var->data.driver_location << ")\n";
297    return false;
298 }
299 
do_process_outputs(nir_variable * output)300 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
301 {
302    sfn_log << SfnLog::instr << "Parse output variable "
303            << output->name << "  @" << output->data.location
304            << "@dl:" << output->data.driver_location << "\n";
305 
306    ++sh_info().noutput;
307    r600_shader_io& io = sh_info().output[output->data.driver_location];
308    tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
309                                     &io.name, &io.sid);
310 
311    /* Check whether this code has become obsolete by the IO vectorization */
312    unsigned num_components = 4;
313    unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
314    if (vector_elements)
315            num_components = vector_elements;
316    unsigned component = output->data.location_frac;
317 
318    for (unsigned j = component; j < num_components + component; j++)
319       io.write_mask |= 1 << j;
320 
321    int loc = output->data.location;
322    if (loc == FRAG_RESULT_COLOR &&
323        (m_nir.info.outputs_written & (1ull << loc))) {
324            sh_info().fs_write_all = true;
325    }
326 
327    if (output->data.location == FRAG_RESULT_COLOR ||
328        (output->data.location >= FRAG_RESULT_DATA0 &&
329         output->data.location <= FRAG_RESULT_DATA7))  {
330       return true;
331    }
332    if (output->data.location == FRAG_RESULT_DEPTH ||
333        output->data.location == FRAG_RESULT_STENCIL ||
334        output->data.location == FRAG_RESULT_SAMPLE_MASK) {
335       io.write_mask = 15;
336       return true;
337    }
338 
339    return false;
340 }
341 
emit_load_sample_mask_in(nir_intrinsic_instr * instr)342 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
343 {
344    auto dest = from_nir(instr->dest, 0);
345    assert(m_sample_id_reg);
346    assert(m_sample_mask_reg);
347 
348    emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
349    emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
350    return true;
351 }
352 
emit_intrinsic_instruction_override(nir_intrinsic_instr * instr)353 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
354 {
355    switch (instr->intrinsic) {
356    case nir_intrinsic_load_sample_mask_in:
357       if (m_apply_sample_mask) {
358          return emit_load_sample_mask_in(instr);
359       } else
360          return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
361    case nir_intrinsic_load_sample_id:
362       return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
363    case nir_intrinsic_load_front_face:
364       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
365    case nir_intrinsic_interp_deref_at_sample:
366       return emit_interp_deref_at_sample(instr);
367    case nir_intrinsic_interp_deref_at_offset:
368       return emit_interp_deref_at_offset(instr);
369    case nir_intrinsic_interp_deref_at_centroid:
370       return emit_interp_deref_at_centroid(instr);
371    case nir_intrinsic_load_sample_pos:
372       return emit_load_sample_pos(instr);
373 
374    default:
375       return false;
376    }
377 }
378 
load_front_face()379 void FragmentShaderFromNir::load_front_face()
380 {
381    assert(m_front_face_reg);
382    if (m_front_face_loaded)
383       return;
384 
385    auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
386                                 Value::zero, {alu_write, alu_last_instr});
387    m_front_face_loaded = true;
388    emit_instruction(ir);
389 }
390 
emit_load_sample_pos(nir_intrinsic_instr * instr)391 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
392 {
393    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
394    auto fetch = new FetchInstruction(vc_fetch,
395                                      no_index_offset,
396                                      fmt_32_32_32_32_float,
397                                      vtx_nf_scaled,
398                                      vtx_es_none,
399                                      m_sample_id_reg,
400                                      dest,
401                                      0,
402                                      false,
403                                      0xf,
404                                      R600_BUFFER_INFO_CONST_BUFFER,
405                                      0,
406                                      bim_none,
407                                      false,
408                                      false,
409                                      0,
410                                      0,
411                                      0,
412                                      PValue(),
413                                      {0,1,2,3});
414    fetch->set_flag(vtx_srf_mode);
415    emit_instruction(fetch);
416    return true;
417 }
418 
emit_interp_deref_at_sample(nir_intrinsic_instr * instr)419 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
420 {
421    GPRVector slope = get_temp_vec4();
422 
423    auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
424                                      from_nir_with_fetch_constant(instr->src[1], 0),
425                                      0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
426    fetch->set_flag(vtx_srf_mode);
427    emit_instruction(fetch);
428 
429    GPRVector grad = get_temp_vec4();
430    auto var = get_deref_location(instr->src[0]);
431    assert(var);
432 
433    auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
434    auto interpolator = m_interpolator[io.ij_index()];
435    PValue dummy(new GPRValue(interpolator.i->sel(), 7));
436 
437    GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
438 
439    auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
440    tex->set_dest_swizzle({0,1,7,7});
441    emit_instruction(tex);
442 
443    tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
444    tex->set_dest_swizzle({7,7,0,1});
445    emit_instruction(tex);
446 
447    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
448    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
449 
450    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
451    emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
452 
453    Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
454 
455    auto dst = vec_from_nir(instr->dest, 4);
456    int num_components = instr->dest.is_ssa ?
457                            instr->dest.ssa.num_components:
458                            instr->dest.reg.reg->num_components;
459 
460    load_interpolated(dst, io, ip, num_components, var->data.location_frac);
461 
462    return true;
463 }
464 
emit_interp_deref_at_offset(nir_intrinsic_instr * instr)465 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
466 {
467    int temp = allocate_temp_register();
468 
469    GPRVector help(temp, {0,1,2,3});
470 
471    auto var = get_deref_location(instr->src[0]);
472    assert(var);
473 
474    auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
475    auto interpolator = m_interpolator[io.ij_index()];
476    PValue dummy(new GPRValue(interpolator.i->sel(), 7));
477 
478    GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
479 
480    auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
481    getgradh->set_dest_swizzle({0,1,7,7});
482    getgradh->set_flag(TexInstruction::x_unnormalized);
483    getgradh->set_flag(TexInstruction::y_unnormalized);
484    getgradh->set_flag(TexInstruction::z_unnormalized);
485    getgradh->set_flag(TexInstruction::w_unnormalized);
486    emit_instruction(getgradh);
487 
488    auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
489    getgradv->set_dest_swizzle({7,7,0,1});
490    getgradv->set_flag(TexInstruction::x_unnormalized);
491    getgradv->set_flag(TexInstruction::y_unnormalized);
492    getgradv->set_flag(TexInstruction::z_unnormalized);
493    getgradv->set_flag(TexInstruction::w_unnormalized);
494    emit_instruction(getgradv);
495 
496    PValue ofs_x = from_nir(instr->src[1], 0);
497    PValue ofs_y = from_nir(instr->src[1], 1);
498    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
499    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
500    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
501    emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
502 
503    Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
504 
505    auto dst = vec_from_nir(instr->dest, 4);
506    load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
507                      var->data.location_frac);
508 
509    return true;
510 }
511 
emit_interp_deref_at_centroid(nir_intrinsic_instr * instr)512 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
513 {
514    auto var = get_deref_location(instr->src[0]);
515    assert(var);
516 
517    auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
518    io.set_uses_interpolate_at_centroid();
519 
520    int ij_index = io.ij_index() >= 3 ? 5 : 2;
521    assert (m_interpolator[ij_index].enabled);
522    auto ip = m_interpolator[ij_index];
523 
524    int num_components = nir_dest_num_components(instr->dest);
525 
526    auto dst = vec_from_nir(instr->dest, 4);
527    load_interpolated(dst, io, ip, num_components, var->data.location_frac);
528    return true;
529 }
530 
531 
do_emit_load_deref(const nir_variable * in_var,nir_intrinsic_instr * instr)532 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
533 {
534    if (in_var->data.location == VARYING_SLOT_POS) {
535       assert(instr->dest.is_ssa);
536 
537       for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
538          inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
539       }
540       return true;
541    }
542 
543    if (in_var->data.location == VARYING_SLOT_FACE)
544       return load_preloaded_value(instr->dest, 0, m_front_face_reg);
545 
546    // todo: replace io with ShaderInputVarying
547    auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
548    unsigned num_components  = 4;
549 
550 
551    if (instr->dest.is_ssa) {
552       num_components = instr->dest.ssa.num_components;
553    } else {
554       num_components = instr->dest.reg.reg->num_components;
555    }
556 
557    auto dst = vec_from_nir(instr->dest, 4);
558 
559    sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
560            << "].gpr=" << dst.sel() << "\n";
561 
562    io.set_gpr(dst.sel());
563 
564    auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
565 
566    load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
567 
568    /* These results are expected starting in slot x..*/
569    if (in_var->data.location_frac > 0) {
570       int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
571                                    instr->dest.reg.reg->num_components;
572       AluInstruction *ir = nullptr;
573       for (int i = 0; i < n; ++i) {
574          ir = new AluInstruction(op1_mov, dst[i],
575                                  dst[i + in_var->data.location_frac], {alu_write});
576          emit_instruction(ir);
577       }
578       if (ir)
579          ir->set_flag(alu_last_instr);
580    }
581 
582 
583    if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
584 
585       auto & color_input  = static_cast<ShaderInputColor&> (io);
586       auto& bgio = m_shaderio.input(color_input.back_color_input_index());
587 
588       bgio.set_gpr(allocate_temp_register());
589 
590       GPRVector bgcol(bgio.gpr(), {0,1,2,3});
591       load_interpolated(bgcol, bgio, ip, num_components, 0);
592 
593       load_front_face();
594 
595       AluInstruction *ir = nullptr;
596       for (unsigned i = 0; i < 4 ; ++i) {
597          ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
598          emit_instruction(ir);
599       }
600       if (ir)
601          ir->set_flag(alu_last_instr);
602    }
603 
604    return true;
605 }
606 
load_interpolated(GPRVector & dest,ShaderInput & io,const Interpolator & ip,int num_components,int start_comp)607 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
608                                               ShaderInput& io, const Interpolator &ip,
609                                               int num_components, int start_comp)
610 {
611    // replace io with ShaderInputVarying
612    if (io.interpolate() > 0) {
613 
614       sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
615 
616       if (num_components == 1) {
617          switch (start_comp) {
618          case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
619          case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
620          case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
621          case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
622          default:
623             assert(0);
624          }
625       }
626 
627       if (num_components == 2) {
628          switch (start_comp) {
629          case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
630          case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
631          case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
632                   load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
633          default:
634             assert(0);
635          }
636       }
637 
638       if (num_components == 3 && start_comp == 0)
639          return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
640                load_interpolated_one_comp(dest, io, ip, op2_interp_z);
641 
642       int full_write_mask = ((1 << num_components) - 1) << start_comp;
643 
644       bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
645       success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
646       return success;
647 
648    } else {
649       AluInstruction *ir = nullptr;
650       for (unsigned i = 0; i < 4 ; ++i) {
651          ir = new AluInstruction(op1_interp_load_p0, dest[i],
652                                  PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
653                                  EmitInstruction::write);
654          emit_instruction(ir);
655       }
656       ir->set_flag(alu_last_instr);
657    }
658    return true;
659 }
660 
load_interpolated_one_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op)661 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
662                                                        ShaderInput& io, const Interpolator& ip, EAluOp op)
663 {
664    for (unsigned i = 0; i < 2 ; ++i) {
665       int chan = i;
666       if (op == op2_interp_z)
667          chan += 2;
668 
669 
670       auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
671                                    PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
672                                    i == 0  ? EmitInstruction::write : EmitInstruction::last);
673       dest.pin_to_channel(chan);
674 
675       ir->set_bank_swizzle(alu_vec_210);
676       emit_instruction(ir);
677    }
678    return true;
679 }
680 
load_interpolated_two_comp(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,int writemask)681 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
682                                                        const Interpolator& ip, EAluOp op, int writemask)
683 {
684    AluInstruction *ir = nullptr;
685    for (unsigned i = 0; i < 4 ; ++i) {
686       ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
687                               (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
688       dest.pin_to_channel(i);
689       ir->set_bank_swizzle(alu_vec_210);
690       emit_instruction(ir);
691    }
692    ir->set_flag(alu_last_instr);
693    return true;
694 }
695 
load_interpolated_two_comp_for_one(GPRVector & dest,ShaderInput & io,const Interpolator & ip,EAluOp op,UNUSED int start,int comp)696 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
697                                                                ShaderInput& io, const Interpolator& ip,
698                                                                EAluOp op, UNUSED int start, int comp)
699 {
700    AluInstruction *ir = nullptr;
701    for (int i = 0; i <  4 ; ++i) {
702       ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
703                                    PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
704                                    i == comp ? EmitInstruction::write : EmitInstruction::empty);
705       ir->set_bank_swizzle(alu_vec_210);
706       dest.pin_to_channel(i);
707       emit_instruction(ir);
708    }
709    ir->set_flag(alu_last_instr);
710    return true;
711 }
712 
713 
emit_export_pixel(const nir_variable * out_var,nir_intrinsic_instr * instr,bool all_chanels)714 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels)
715 {
716    int outputs = all_chanels ? m_max_color_exports : 1;
717 
718    std::array<uint32_t,4> swizzle;
719    unsigned writemask = nir_intrinsic_write_mask(instr);
720    switch (out_var->data.location) {
721    case FRAG_RESULT_DEPTH:
722       writemask = 1;
723       swizzle = {0,7,7,7};
724       break;
725    case FRAG_RESULT_STENCIL:
726       writemask = 2;
727       swizzle = {7,0,7,7};
728       break;
729    case FRAG_RESULT_SAMPLE_MASK:
730       writemask = 4;
731       swizzle = {7,7,0,7};
732       break;
733    default:
734       for (int i = 0; i < 4; ++i) {
735          swizzle[i] = (i < instr->num_components) ? i : 7;
736       }
737    }
738 
739    auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
740 
741    set_output(out_var->data.driver_location, value.sel());
742 
743    if (out_var->data.location == FRAG_RESULT_COLOR ||
744        (out_var->data.location >= FRAG_RESULT_DATA0 &&
745         out_var->data.location <= FRAG_RESULT_DATA7)) {
746       for (int k = 0 ; k < outputs; ++k) {
747 
748          unsigned location = out_var->data.driver_location + k - m_depth_exports;
749          if (location >= m_max_color_exports) {
750             sfn_log << SfnLog::io << "Pixel output " << location
751                     << " skipped  because  we have only "   << m_max_color_exports << "CBs\n";
752             continue;
753          }
754 
755          m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
756 
757          if (sh_info().ps_export_highest < location)
758             sh_info().ps_export_highest = location;
759 
760          sh_info().nr_ps_color_exports++;
761 
762          unsigned mask = (0xfu << (location * 4));
763          sh_info().ps_color_export_mask |= mask;
764 
765          emit_export_instruction(m_last_pixel_export);
766          ++m_max_counted_color_exports;
767       };
768    } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
769               out_var->data.location == FRAG_RESULT_STENCIL ||
770               out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
771       m_depth_exports++;
772       emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
773    } else {
774       return false;
775    }
776    return true;
777 }
778 
do_finalize()779 void FragmentShaderFromNir::do_finalize()
780 {
781    // update shader io info and set LDS etc.
782    sh_info().ninput = m_shaderio.inputs().size();
783 
784    sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
785    for (size_t i = 0; i < sh_info().ninput; ++i) {
786       int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
787                     m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
788       m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
789    }
790 
791    sh_info().two_side = m_shaderio.two_sided();
792    sh_info().nlds = m_shaderio.nlds();
793 
794    sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
795 
796    if (sh_info().fs_write_all) {
797       sh_info().nr_ps_max_color_exports = m_max_color_exports;
798    }
799 
800    if (!m_last_pixel_export) {
801       GPRVector v(0, {7,7,7,7});
802       m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
803       sh_info().nr_ps_color_exports++;
804       sh_info().ps_color_export_mask = 0xf;
805       emit_export_instruction(m_last_pixel_export);
806    }
807 
808    m_last_pixel_export->set_last();
809 
810    if (sh_info().fs_write_all)
811       sh_info().nr_ps_max_color_exports = 8;
812 }
813 
814 }
815