1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader_geometry.h"
28 #include "sfn_instruction_misc.h"
29 #include "sfn_instruction_fetch.h"
30 #include "sfn_shaderio.h"
31 
32 namespace r600 {
33 
GeometryShaderFromNir(r600_pipe_shader * sh,r600_pipe_shader_selector & sel,const r600_shader_key & key,enum chip_class chip_class)34 GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh,
35                                              r600_pipe_shader_selector &sel,
36                                              const r600_shader_key &key,
37                                              enum chip_class chip_class):
38    VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader,
39                sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter),
40    m_pipe_shader(sh),
41    m_so_info(&sel.so),
42    m_first_vertex_emitted(false),
43    m_offset(0),
44    m_next_input_ring_offset(0),
45    m_key(key),
46    m_clip_dist_mask(0),
47    m_cur_ring_output(0),
48    m_gs_tri_strip_adj_fix(false),
49    m_input_mask(0)
50 {
51    sh_info().atomic_base = key.gs.first_atomic_counter;
52 }
53 
emit_store(nir_intrinsic_instr * instr)54 bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr)
55 {
56    auto location = nir_intrinsic_io_semantics(instr).location;
57    auto index = nir_src_as_const_value(instr->src[1]);
58    assert(index);
59    auto driver_location = nir_intrinsic_base(instr) + index->u32;
60 
61    uint32_t write_mask = nir_intrinsic_write_mask(instr);
62    GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
63 
64    auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true);
65 
66    sh_info().output[driver_location].write_mask = write_mask;
67 
68    auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
69                                       4 * driver_location,
70                                       instr->num_components, m_export_base[0]);
71    streamout_data[location] = ir;
72 
73    return true;
74 }
75 
scan_sysvalue_access(UNUSED nir_instr * instr)76 bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
77 {
78    if (instr->type != nir_instr_type_intrinsic)
79       return true;
80 
81    nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
82 
83    switch (ii->intrinsic) {
84    case nir_intrinsic_store_output:
85       return process_store_output(ii);
86    case nir_intrinsic_load_input:
87    case nir_intrinsic_load_per_vertex_input:
88       return process_load_input(ii);
89    default:
90       return true;
91    }
92 }
93 
process_store_output(nir_intrinsic_instr * instr)94 bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr)
95 {
96    auto location = nir_intrinsic_io_semantics(instr).location;
97    auto index = nir_src_as_const_value(instr->src[1]);
98    assert(index);
99 
100    auto driver_location = nir_intrinsic_base(instr) + index->u32;
101 
102    if (location == VARYING_SLOT_COL0 ||
103        location == VARYING_SLOT_COL1 ||
104        (location >= VARYING_SLOT_VAR0 &&
105        location <= VARYING_SLOT_VAR31) ||
106        (location >= VARYING_SLOT_TEX0 &&
107        location <= VARYING_SLOT_TEX7) ||
108        location == VARYING_SLOT_BFC0 ||
109        location == VARYING_SLOT_BFC1 ||
110        location == VARYING_SLOT_PNTC ||
111        location == VARYING_SLOT_CLIP_VERTEX ||
112        location == VARYING_SLOT_CLIP_DIST0 ||
113        location == VARYING_SLOT_CLIP_DIST1 ||
114        location == VARYING_SLOT_PRIMITIVE_ID ||
115        location == VARYING_SLOT_POS ||
116        location == VARYING_SLOT_PSIZ ||
117        location == VARYING_SLOT_LAYER ||
118        location == VARYING_SLOT_VIEWPORT ||
119        location == VARYING_SLOT_FOGC) {
120       r600_shader_io& io = sh_info().output[driver_location];
121 
122       auto semantic = r600_get_varying_semantic(location);
123       io.name = semantic.first;
124       io.sid = semantic.second;
125 
126       evaluate_spi_sid(io);
127 
128       if (sh_info().noutput <= driver_location)
129          sh_info().noutput = driver_location + 1;
130 
131       if (location == VARYING_SLOT_CLIP_DIST0 ||
132           location == VARYING_SLOT_CLIP_DIST1) {
133          m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
134       }
135 
136       if (location == VARYING_SLOT_VIEWPORT) {
137          sh_info().vs_out_viewport = 1;
138          sh_info().vs_out_misc_write = 1;
139       }
140       return true;
141    }
142    return false;
143 }
144 
process_load_input(nir_intrinsic_instr * instr)145 bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr)
146 {
147    auto location = nir_intrinsic_io_semantics(instr).location;
148    auto index = nir_src_as_const_value(instr->src[1]);
149    assert(index);
150 
151    auto driver_location = nir_intrinsic_base(instr) + index->u32;
152 
153    if (location == VARYING_SLOT_POS ||
154        location == VARYING_SLOT_PSIZ ||
155        location == VARYING_SLOT_FOGC ||
156        location == VARYING_SLOT_CLIP_VERTEX ||
157        location == VARYING_SLOT_CLIP_DIST0 ||
158        location == VARYING_SLOT_CLIP_DIST1 ||
159        location == VARYING_SLOT_COL0 ||
160        location == VARYING_SLOT_COL1 ||
161        location == VARYING_SLOT_BFC0 ||
162        location == VARYING_SLOT_BFC1 ||
163        location == VARYING_SLOT_PNTC ||
164        (location >= VARYING_SLOT_VAR0 &&
165         location <= VARYING_SLOT_VAR31) ||
166        (location >= VARYING_SLOT_TEX0 &&
167        location <= VARYING_SLOT_TEX7)) {
168 
169       uint64_t bit = 1ull << location;
170       if (!(bit & m_input_mask)) {
171          r600_shader_io& io = sh_info().input[driver_location];
172          auto semantic = r600_get_varying_semantic(location);
173          io.name = semantic.first;
174          io.sid = semantic.second;
175 
176          io.ring_offset = 16 * driver_location;
177          ++sh_info().ninput;
178          m_next_input_ring_offset += 16;
179          m_input_mask |= bit;
180       }
181       return true;
182    }
183    return false;
184 }
185 
do_allocate_reserved_registers()186 bool GeometryShaderFromNir::do_allocate_reserved_registers()
187 {
188    const int sel[6] = {0, 0 ,0, 1, 1, 1};
189    const int chan[6] = {0, 1 ,3, 0, 1, 2};
190 
191    increment_reserved_registers();
192    increment_reserved_registers();
193 
194    /* Reserve registers used by the shaders (should check how many
195     * components are actually used */
196    for (int i = 0; i < 6; ++i) {
197       auto reg = new GPRValue(sel[i], chan[i]);
198       reg->set_as_input();
199       m_per_vertex_offsets[i].reset(reg);
200       inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false);
201    }
202    auto reg = new GPRValue(0, 2);
203    reg->set_as_input();
204    m_primitive_id.reset(reg);
205    inject_register(0, 2, m_primitive_id, false);
206 
207    reg = new GPRValue(1, 3);
208    reg->set_as_input();
209    m_invocation_id.reset(reg);
210    inject_register(1, 3, m_invocation_id, false);
211 
212    m_export_base[0] = get_temp_register(0);
213    m_export_base[1] = get_temp_register(0);
214    m_export_base[2] = get_temp_register(0);
215    m_export_base[3] = get_temp_register(0);
216    emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr}));
217    emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr}));
218    emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr}));
219    emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr}));
220 
221    sh_info().ring_item_sizes[0] = m_next_input_ring_offset;
222 
223    if (m_key.gs.tri_strip_adj_fix)
224       emit_adj_fix();
225 
226    return true;
227 }
228 
emit_adj_fix()229 void GeometryShaderFromNir::emit_adj_fix()
230 {
231    PValue adjhelp0(new  GPRValue(m_export_base[0]->sel(), 1));
232    emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr});
233 
234    int reg_indices[6];
235    int reg_chanels[6] = {1, 2, 3, 1, 2, 3};
236 
237    int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
238 
239    reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
240    reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
241 
242    std::array<PValue, 6> adjhelp;
243 
244    AluInstruction *ir = nullptr;
245    for (int i = 0; i < 6; i++) {
246       adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i]));
247       ir = new AluInstruction(op3_cnde_int, adjhelp[i],
248                              {adjhelp0, m_per_vertex_offsets[i],
249                               m_per_vertex_offsets[rotate_indices[i]]},
250                              {alu_write});
251       if ((get_chip_class() == CAYMAN && i == 2) || (i  == 3))
252          ir->set_flag(alu_last_instr);
253       emit_instruction(ir);
254    }
255    ir->set_flag(alu_last_instr);
256 
257    for (int i = 0; i < 6; i++)
258       m_per_vertex_offsets[i] = adjhelp[i];
259 }
260 
261 
emit_intrinsic_instruction_override(nir_intrinsic_instr * instr)262 bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
263 {
264    switch (instr->intrinsic) {
265    case nir_intrinsic_emit_vertex:
266       return emit_vertex(instr, false);
267    case nir_intrinsic_end_primitive:
268       return emit_vertex(instr, true);
269    case nir_intrinsic_load_primitive_id:
270       return load_preloaded_value(instr->dest, 0, m_primitive_id);
271    case nir_intrinsic_load_invocation_id:
272       return load_preloaded_value(instr->dest, 0, m_invocation_id);
273    case nir_intrinsic_store_output:
274       return emit_store(instr);
275    case nir_intrinsic_load_per_vertex_input:
276       return emit_load_per_vertex_input(instr);
277    default:
278       ;
279    }
280    return false;
281 }
282 
emit_vertex(nir_intrinsic_instr * instr,bool cut)283 bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut)
284 {
285    int stream = nir_intrinsic_stream_id(instr);
286    assert(stream < 4);
287 
288    for(auto v: streamout_data) {
289       if (stream == 0 || v.first != VARYING_SLOT_POS) {
290          v.second->patch_ring(stream, m_export_base[stream]);
291          emit_instruction(v.second);
292       } else
293          delete v.second;
294    }
295    streamout_data.clear();
296    emit_instruction(new EmitVertex(stream, cut));
297 
298    if (!cut)
299       emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream],
300                                           PValue(new LiteralValue(sh_info().noutput)),
301                                           {alu_write, alu_last_instr}));
302 
303    return true;
304 }
305 
emit_load_per_vertex_input(nir_intrinsic_instr * instr)306 bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
307 {
308    auto dest = vec_from_nir(instr->dest, 4);
309 
310    std::array<int, 4> swz = {7,7,7,7};
311    for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
312       swz[i] = i + nir_intrinsic_component(instr);
313    }
314 
315    auto literal_index = nir_src_as_const_value(instr->src[0]);
316 
317    if (!literal_index) {
318       sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
319       return false;
320    }
321    assert(literal_index->u32 < 6);
322    assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
323 
324    PValue addr = m_per_vertex_offsets[literal_index->u32];
325    auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr,
326                                      16 * nir_intrinsic_base(instr),
327                                      R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true);
328    fetch->set_dest_swizzle(swz);
329 
330    emit_instruction(fetch);
331    return true;
332 }
333 
do_finalize()334 void GeometryShaderFromNir::do_finalize()
335 {
336    if (m_clip_dist_mask) {
337       int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask);
338       sh_info().cc_dist_mask = (1 << num_clip_dist) - 1;
339       sh_info().clip_dist_write = (1 << num_clip_dist) - 1;
340    }
341 }
342 
343 }
344