1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 
28 #include "sfn_emitaluinstruction.h"
29 #include "sfn_debug.h"
30 
31 #include "gallium/drivers/r600/r600_shader.h"
32 
33 namespace r600 {
34 
35 using std::vector;
36 
EmitAluInstruction(ShaderFromNirProcessor & processor)37 EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
38    EmitInstruction (processor)
39 {
40 
41 }
42 
do_emit(nir_instr * ir)43 bool EmitAluInstruction::do_emit(nir_instr* ir)
44 {
45    const nir_alu_instr& instr = *nir_instr_as_alu(ir);
46 
47    r600::sfn_log << SfnLog::instr << "emit '"
48                  << *ir
49                  << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
50                  << "' (" << __func__ << ")\n";
51 
52    preload_src(instr);
53 
54    if (get_chip_class() == CAYMAN) {
55       switch (instr.op) {
56       case nir_op_fcos_r600: return emit_alu_cm_trig(instr, op1_cos);
57       case nir_op_fexp2: return emit_alu_cm_trig(instr, op1_exp_ieee);
58       case nir_op_flog2: return emit_alu_cm_trig(instr, op1_log_clamped);
59       case nir_op_frcp: return emit_alu_cm_trig(instr, op1_recip_ieee);
60       case nir_op_frsq: return emit_alu_cm_trig(instr, op1_recipsqrt_ieee1);
61       case nir_op_fsin_r600: return emit_alu_cm_trig(instr, op1_sin);
62       case nir_op_fsqrt: return emit_alu_cm_trig(instr, op1_sqrt_ieee);
63          default:
64             ;
65       }
66    }
67 
68    switch (instr.op) {
69     /* These are in the ALU instruction list, but they should be texture instructions */
70    case nir_op_b2b1: return emit_mov(instr);
71    case nir_op_b2b32: return emit_mov(instr);
72    case nir_op_b2f32: return emit_alu_b2f(instr);
73    case nir_op_b2i32: return emit_b2i32(instr);
74    case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
75    case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
76    case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
77    case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
78    case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
79    case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
80    case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
81    case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
82    case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
83    case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
84    case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
85    case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
86    case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
87    case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
88    case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
89    case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
90    case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
91    case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
92    case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
93    case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
94    case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
95    case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
96    case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
97    case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
98    case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
99    case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
100    case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
101    case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
102 
103    case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
104    case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
105    case nir_op_cube_r600: return emit_cube(instr);
106    case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
107    case nir_op_f2b32: return emit_alu_f2b32(instr);
108    case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
109    case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
110    case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
111    case nir_op_fadd: return emit_alu_op2(instr, op2_add);
112    case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
113    case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
114    case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
115    case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
116    case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
117 
118     /* These are in the ALU instruction list, but they should be texture instructions */
119    case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
120    case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
121    case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
122    case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
123    case nir_op_fddy_coarse:
124    case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
125    case nir_op_fdot2: return emit_dot(instr, 2);
126    case nir_op_fdot3: return emit_dot(instr, 3);
127    case nir_op_fdot4: return emit_dot(instr, 4);
128    case nir_op_fdph:  return emit_fdph(instr);
129    case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
130    case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
131    case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
132    case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
133    case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
134    case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
135    case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
136    case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
137    case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
138    case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
139    case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
140    case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
141    case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
142    case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
143    case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
144    case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
145    case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
146    case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
147    case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
148    case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
149    case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
150    case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
151    case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
152    case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
153    case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
154    case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
155    case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
156    case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
157    case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
158    case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
159    case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
160    case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
161    case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int,  {0, 1, 2});
162    case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int,  {0, 1, 2});
163    case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
164    case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
165    case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
166    case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
167    case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
168    case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
169    case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
170    case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
171    case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
172    case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
173    case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
174    case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
175    case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
176    case nir_op_ineg: return emit_alu_ineg(instr);
177    case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
178    case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
179    case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
180    case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
181    case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
182    case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
183    case nir_op_mov:return emit_mov(instr);
184    case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
185    case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
186    case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
187    case nir_op_sge: return emit_alu_op2(instr, op2_setge);
188    case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
189    case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
190    case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
191    case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
192    case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
193    case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
194    case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
195    case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
196    case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
197    case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
198    case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
199    case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
200    case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
201    case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
202    case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
203    case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
204    case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
205    case nir_op_vec2: return emit_create_vec(instr, 2);
206    case nir_op_vec3: return emit_create_vec(instr, 3);
207    case nir_op_vec4: return emit_create_vec(instr, 4);
208    default:
209       return false;
210    }
211 }
212 
preload_src(const nir_alu_instr & instr)213 void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
214 {
215    const nir_op_info *op_info = &nir_op_infos[instr.op];
216    assert(op_info->num_inputs <= 4);
217 
218    unsigned nsrc_comp = num_src_comp(instr);
219    sfn_log << SfnLog::reg << "Preload:\n";
220    for (unsigned i = 0; i < op_info->num_inputs; ++i) {
221       for (unsigned c = 0; c < nsrc_comp; ++c) {
222          m_src[i][c] = from_nir(instr.src[i], c);
223          sfn_log << SfnLog::reg << " " << *m_src[i][c];
224 
225       }
226       sfn_log << SfnLog::reg << "\n";
227    }
228    if (instr.op == nir_op_fdph) {
229       m_src[1][3] = from_nir(instr.src[1], 3);
230       sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
231    }
232 
233    split_constants(instr, nsrc_comp);
234 }
235 
num_src_comp(const nir_alu_instr & instr)236 unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
237 {
238    switch (instr.op) {
239    case nir_op_fdot2:
240    case nir_op_bany_inequal2:
241    case nir_op_ball_iequal2:
242    case nir_op_bany_fnequal2:
243    case nir_op_ball_fequal2:
244    case nir_op_b32any_inequal2:
245    case nir_op_b32all_iequal2:
246    case nir_op_b32any_fnequal2:
247    case nir_op_b32all_fequal2:
248    case nir_op_unpack_64_2x32_split_y:
249       return 2;
250 
251    case nir_op_fdot3:
252    case nir_op_bany_inequal3:
253    case nir_op_ball_iequal3:
254    case nir_op_bany_fnequal3:
255    case nir_op_ball_fequal3:
256    case nir_op_b32any_inequal3:
257    case nir_op_b32all_iequal3:
258    case nir_op_b32any_fnequal3:
259    case nir_op_b32all_fequal3:
260    case nir_op_cube_r600:
261       return 3;
262 
263    case nir_op_fdot4:
264    case nir_op_fdph:
265    case nir_op_bany_inequal4:
266    case nir_op_ball_iequal4:
267    case nir_op_bany_fnequal4:
268    case nir_op_ball_fequal4:
269    case nir_op_b32any_inequal4:
270    case nir_op_b32all_iequal4:
271    case nir_op_b32any_fnequal4:
272    case nir_op_b32all_fequal4:
273       return 4;
274 
275    case nir_op_vec2:
276    case nir_op_vec3:
277    case nir_op_vec4:
278       return 1;
279 
280    default:
281       return nir_dest_num_components(instr.dest.dest);
282 
283    }
284 }
285 
emit_cube(const nir_alu_instr & instr)286 bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
287 {
288    AluInstruction *ir = nullptr;
289    const uint16_t src0_chan[4] = {2, 2, 0, 1};
290    const uint16_t src1_chan[4] = {1, 0, 2, 2};
291 
292    for (int i = 0; i < 4; ++i)  {
293       ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
294                               from_nir(instr.src[0], src0_chan[i]),
295                               from_nir(instr.src[0], src1_chan[i]), {alu_write});
296       emit_instruction(ir);
297    }
298    ir->set_flag(alu_last_instr);
299    return true;
300 }
301 
split_constants(const nir_alu_instr & instr,unsigned nsrc_comp)302 void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
303 {
304     const nir_op_info *op_info = &nir_op_infos[instr.op];
305     if (op_info->num_inputs < 2)
306        return;
307 
308     int nconst = 0;
309     std::array<const UniformValue *,4> c;
310     std::array<int,4> idx;
311     for (unsigned i = 0; i < op_info->num_inputs; ++i) {
312        PValue& src = m_src[i][0];
313        assert(src);
314        sfn_log << SfnLog::reg << "Split test " << *src;
315 
316        if (src->type() == Value::kconst) {
317           c[nconst] = static_cast<const UniformValue *>(src.get());
318           idx[nconst++] = i;
319           sfn_log << SfnLog::reg << " is constant " << i;
320        }
321        sfn_log << SfnLog::reg << "\n";
322     }
323 
324     if (nconst < 2)
325        return;
326 
327     unsigned sel = c[0]->sel();
328     unsigned kcache =  c[0]->kcache_bank();
329     sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
330 
331     for (int i = 1; i < nconst; ++i) {
332        sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
333        if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
334           AluInstruction *ir = nullptr;
335           auto v = get_temp_vec4();
336           for (unsigned k = 0; k < nsrc_comp; ++k) {
337              ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
338              emit_instruction(ir);
339              m_src[idx[i]][k] = v[k];
340           }
341           make_last(ir);
342        }
343     }
344 }
345 
emit_alu_inot(const nir_alu_instr & instr)346 bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
347 {
348    if (instr.src[0].negate || instr.src[0].abs) {
349       std::cerr << "source modifiers not supported with int ops\n";
350       return false;
351    }
352 
353    AluInstruction *ir = nullptr;
354    for (int i = 0; i < 4 ; ++i) {
355       if (instr.dest.write_mask & (1 << i)){
356          ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
357                                  m_src[0][i], write);
358          emit_instruction(ir);
359       }
360    }
361    make_last(ir);
362    return true;
363 }
364 
emit_alu_op1(const nir_alu_instr & instr,EAluOp opcode,const AluOpFlags & flags)365 bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
366                                       const AluOpFlags& flags)
367 {
368    AluInstruction *ir = nullptr;
369    for (int i = 0; i < 4 ; ++i) {
370       if (instr.dest.write_mask & (1 << i)){
371          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
372                                  m_src[0][i], write);
373 
374          if (flags.test(alu_src0_abs) || instr.src[0].abs)
375             ir->set_flag(alu_src0_abs);
376 
377          if (instr.src[0].negate ^ flags.test(alu_src0_neg))
378             ir->set_flag(alu_src0_neg);
379 
380          if (flags.test(alu_dst_clamp) || instr.dest.saturate)
381              ir->set_flag(alu_dst_clamp);
382 
383          emit_instruction(ir);
384       }
385    }
386    make_last(ir);
387 
388    return true;
389 }
390 
emit_mov(const nir_alu_instr & instr)391 bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
392 {
393    /* If the op is a plain move beween SSA values we can just forward
394     * the register reference to the original register */
395    if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
396        !instr.src[0].abs && !instr.src[0].negate  && !instr.dest.saturate) {
397       bool result = true;
398       for (int i = 0; i < 4 ; ++i) {
399          if (instr.dest.write_mask & (1 << i)){
400             result &= inject_register(instr.dest.dest.ssa.index, i,
401                                       m_src[0][i], true);
402          }
403       }
404       return result;
405    } else {
406       return emit_alu_op1(instr, op1_mov);
407    }
408 }
409 
emit_alu_trans_op1(const nir_alu_instr & instr,EAluOp opcode,bool absolute)410 bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
411                                             bool absolute)
412 {
413    AluInstruction *ir = nullptr;
414    std::set<int> src_idx;
415 
416    if (get_chip_class() == CAYMAN) {
417       int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
418       for (int i = 0; i < last_slot; ++i) {
419          bool write_comp = instr.dest.write_mask & (1 << i);
420          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
421                                  m_src[0][write_comp ? i : 0], write_comp ? write : empty);
422          if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
423          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
424          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
425 
426          if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
427 
428          emit_instruction(ir);
429       }
430    } else {
431       for (int i = 0; i < 4 ; ++i) {
432          if (instr.dest.write_mask & (1 << i)){
433             ir = new AluInstruction(opcode, from_nir(instr.dest, i),
434                                     m_src[0][i], last_write);
435             if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
436             if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
437             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
438             emit_instruction(ir);
439          }
440       }
441    }
442    return true;
443 }
444 
emit_alu_cm_trig(const nir_alu_instr & instr,EAluOp opcode)445 bool EmitAluInstruction::emit_alu_cm_trig(const nir_alu_instr& instr, EAluOp opcode)
446 {
447    AluInstruction *ir = nullptr;
448    std::set<int> src_idx;
449 
450    unsigned last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
451 
452    for (unsigned j = 0; j < nir_dest_num_components(instr.dest.dest); ++j) {
453       for (unsigned i = 0; i < last_slot; ++i) {
454          bool write_comp = instr.dest.write_mask & (1 << j) && (i == j);
455          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
456                                  m_src[0][j], write_comp ? write : empty);
457          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
458          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
459          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
460 
461          if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
462 
463          emit_instruction(ir);
464       }
465    }
466    return true;
467 }
468 
469 
emit_alu_f2i32_or_u32(const nir_alu_instr & instr,EAluOp op)470 bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
471 {
472    AluInstruction *ir = nullptr;
473 
474    if (get_chip_class() < CAYMAN) {
475       std::array<PValue, 4> v;
476 
477       for (int i = 0; i < 4; ++i) {
478          if (!(instr.dest.write_mask & (1 << i)))
479             continue;
480          v[i] = from_nir(instr.dest, i);
481          ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
482          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
483          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
484          emit_instruction(ir);
485       }
486       make_last(ir);
487 
488       for (int i = 0; i < 4; ++i) {
489          if (!(instr.dest.write_mask & (1 << i)))
490             continue;
491          ir = new AluInstruction(op, v[i], v[i], {alu_write});
492          emit_instruction(ir);
493          if (op == op1_flt_to_uint)
494             make_last(ir);
495       }
496       make_last(ir);
497    } else {
498       for (int i = 0; i < 4; ++i) {
499          if (!(instr.dest.write_mask & (1 << i)))
500             continue;
501          ir = new AluInstruction(op, from_nir(instr.dest, i), m_src[0][i], {alu_write});
502          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
503          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
504          emit_instruction(ir);
505          if (op == op1_flt_to_uint)
506             make_last(ir);
507       }
508       make_last(ir);
509    }
510 
511    return true;
512 }
513 
emit_alu_f2b32(const nir_alu_instr & instr)514 bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
515 {
516    AluInstruction *ir = nullptr;
517    for (int i = 0; i < 4 ; ++i) {
518       if (instr.dest.write_mask & (1 << i)){
519          ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
520                                  m_src[0][i], literal(0.0f), write);
521          emit_instruction(ir);
522       }
523    }
524    make_last(ir);
525    return true;
526 }
527 
emit_b2i32(const nir_alu_instr & instr)528 bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
529 {
530    AluInstruction *ir = nullptr;
531    for (int i = 0; i < 4 ; ++i) {
532       if (!(instr.dest.write_mask & (1 << i)))
533          continue;
534 
535       ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
536                               m_src[0][i], Value::one_i, write);
537      emit_instruction(ir);
538    }
539    make_last(ir);
540 
541    return true;
542 }
543 
emit_pack_64_2x32_split(const nir_alu_instr & instr)544 bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
545 {
546    AluInstruction *ir = nullptr;
547    for (unsigned i = 0; i < 2; ++i) {
548       if (!(instr.dest.write_mask & (1 << i)))
549          continue;
550      ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
551                              m_src[0][i], write);
552      emit_instruction(ir);
553    }
554    ir->set_flag(alu_last_instr);
555    return true;
556 }
557 
emit_unpack_64_2x32_split(const nir_alu_instr & instr,unsigned comp)558 bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
559 {
560    emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
561                                        m_src[0][comp], last_write));
562    return true;
563 }
564 
emit_create_vec(const nir_alu_instr & instr,unsigned nc)565 bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
566 {
567    AluInstruction *ir = nullptr;
568    std::set<int> src_slot;
569    for(unsigned i = 0; i < nc; ++i) {
570       if (instr.dest.write_mask & (1 << i)){
571          auto src = m_src[i][0];
572          ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
573          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
574 
575          // FIXME: This is a rather crude approach to fix the problem that
576          // r600 can't read from four different slots of the same component
577          // here we check only for the register index
578          if (src->type() == Value::gpr)
579             src_slot.insert(src->sel());
580          if (src_slot.size() >= 3) {
581             src_slot.clear();
582             ir->set_flag(alu_last_instr);
583          }
584          emit_instruction(ir);
585       }
586    }
587    if (ir)
588       ir->set_flag(alu_last_instr);
589    return true;
590 }
591 
emit_dot(const nir_alu_instr & instr,int n)592 bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
593 {
594    const nir_alu_src& src0 = instr.src[0];
595    const nir_alu_src& src1 = instr.src[1];
596 
597    AluInstruction *ir = nullptr;
598    for (int i = 0; i < n ; ++i) {
599       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
600                               m_src[0][i], m_src[1][i],
601                               instr.dest.write_mask & (1 << i) ? write : empty);
602 
603       if (src0.negate) ir->set_flag(alu_src0_neg);
604       if (src0.abs) ir->set_flag(alu_src0_abs);
605       if (src1.negate) ir->set_flag(alu_src1_neg);
606       if (src1.abs) ir->set_flag(alu_src1_abs);
607 
608       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
609       emit_instruction(ir);
610    }
611    for (int i = n; i < 4 ; ++i) {
612       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
613                               Value::zero, Value::zero,
614                               instr.dest.write_mask & (1 << i) ? write : empty);
615       emit_instruction(ir);
616    }
617 
618    if (ir)
619       ir->set_flag(alu_last_instr);
620    return true;
621 }
622 
emit_fdph(const nir_alu_instr & instr)623 bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
624 {
625    const nir_alu_src& src0 = instr.src[0];
626    const nir_alu_src& src1 = instr.src[1];
627 
628    AluInstruction *ir = nullptr;
629    for (int i = 0; i < 3 ; ++i) {
630       ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
631                               m_src[0][i], m_src[1][i],
632                               instr.dest.write_mask & (1 << i) ? write : empty);
633       if (src0.negate) ir->set_flag(alu_src0_neg);
634       if (src0.abs) ir->set_flag(alu_src0_abs);
635       if (src1.negate) ir->set_flag(alu_src1_neg);
636       if (src1.abs) ir->set_flag(alu_src1_abs);
637       if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
638       emit_instruction(ir);
639    }
640 
641    ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
642                            m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
643    if (src1.negate) ir->set_flag(alu_src1_neg);
644    if (src1.abs) ir->set_flag(alu_src1_abs);
645    emit_instruction(ir);
646 
647    ir->set_flag(alu_last_instr);
648    return true;
649 
650 }
651 
emit_alu_i2orf2_b1(const nir_alu_instr & instr,EAluOp op)652 bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
653 {
654    AluInstruction *ir = nullptr;
655    for (int i = 0; i < 4 ; ++i) {
656       if (instr.dest.write_mask & (1 << i)) {
657          ir = new AluInstruction(op, from_nir(instr.dest, i),
658                                  m_src[0][i], Value::zero,
659                                  write);
660          emit_instruction(ir);
661       }
662    }
663    if (ir)
664       ir->set_flag(alu_last_instr);
665    return true;
666 }
667 
emit_alu_b2f(const nir_alu_instr & instr)668 bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
669 {
670    AluInstruction *ir = nullptr;
671    for (int i = 0; i < 4 ; ++i) {
672       if (instr.dest.write_mask & (1 << i)){
673          ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
674                                  m_src[0][i], Value::one_f, write);
675          if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
676          if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
677          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
678          emit_instruction(ir);
679       }
680    }
681    if (ir)
682       ir->set_flag(alu_last_instr);
683    return true;
684 }
685 
emit_any_all_icomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)686 bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
687 {
688 
689    AluInstruction *ir = nullptr;
690    PValue v[4]; // this might need some additional temp register creation
691    for (unsigned i = 0; i < 4 ; ++i)
692       v[i] = from_nir(instr.dest, i);
693 
694    EAluOp combine = all ? op2_and_int : op2_or_int;
695 
696    /* For integers we can not use the modifiers, so this needs some emulation */
697    /* Should actually be lowered with NIR */
698    if (instr.src[0].negate == instr.src[1].negate &&
699        instr.src[0].abs == instr.src[1].abs) {
700 
701       for (unsigned i = 0; i < nc ; ++i) {
702          ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
703          emit_instruction(ir);
704       }
705       if (ir)
706          ir->set_flag(alu_last_instr);
707    } else {
708       std::cerr << "Negate in iequal/inequal not (yet) supported\n";
709       return false;
710    }
711 
712    for (unsigned i = 0; i < nc/2 ; ++i) {
713       ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
714       emit_instruction(ir);
715    }
716    if (ir)
717       ir->set_flag(alu_last_instr);
718 
719    if (nc > 2) {
720       ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
721       emit_instruction(ir);
722    }
723 
724    return true;
725 }
726 
emit_any_all_fcomp(const nir_alu_instr & instr,EAluOp op,unsigned nc,bool all)727 bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
728 {
729    AluInstruction *ir = nullptr;
730    PValue v[4]; // this might need some additional temp register creation
731    for (unsigned i = 0; i < 4 ; ++i)
732       v[i] = from_nir(instr.dest, i);
733 
734    for (unsigned i = 0; i < nc ; ++i) {
735       ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
736 
737       if (instr.src[0].abs)
738          ir->set_flag(alu_src0_abs);
739       if (instr.src[0].negate)
740          ir->set_flag(alu_src0_neg);
741 
742       if (instr.src[1].abs)
743          ir->set_flag(alu_src1_abs);
744       if (instr.src[1].negate)
745          ir->set_flag(alu_src1_neg);
746 
747       emit_instruction(ir);
748    }
749    if (ir)
750       ir->set_flag(alu_last_instr);
751 
752    for (unsigned i = 0; i < nc ; ++i) {
753       ir = new AluInstruction(op1_max4, v[i], v[i], write);
754       if (all) ir->set_flag(alu_src0_neg);
755       emit_instruction(ir);
756    }
757 
758    for (unsigned i = nc; i < 4 ; ++i) {
759       ir = new AluInstruction(op1_max4, v[i],
760                               all ? Value::one_f : Value::zero, write);
761       if (all)
762          ir->set_flag(alu_src0_neg);
763 
764       emit_instruction(ir);
765    }
766 
767    ir->set_flag(alu_last_instr);
768 
769    if (all)
770       op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
771    else
772       op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
773 
774    ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
775    if (all)
776       ir->set_flag(alu_src1_neg);
777    emit_instruction(ir);
778 
779    return true;
780 }
781 
emit_any_all_fcomp2(const nir_alu_instr & instr,EAluOp op,bool all)782 bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
783 {
784    AluInstruction *ir = nullptr;
785    PValue v[4]; // this might need some additional temp register creation
786    for (unsigned i = 0; i < 4 ; ++i)
787       v[i] = from_nir(instr.dest, i);
788 
789    for (unsigned i = 0; i < 2 ; ++i) {
790       ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
791       if (instr.src[0].abs)
792          ir->set_flag(alu_src0_abs);
793       if (instr.src[0].negate)
794          ir->set_flag(alu_src0_neg);
795 
796       if (instr.src[1].abs)
797          ir->set_flag(alu_src1_abs);
798       if (instr.src[1].negate)
799          ir->set_flag(alu_src1_neg);
800 
801       emit_instruction(ir);
802    }
803    if (ir)
804       ir->set_flag(alu_last_instr);
805 
806    op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
807    ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
808    emit_instruction(ir);
809 
810    return true;
811 }
812 
emit_alu_trans_op2(const nir_alu_instr & instr,EAluOp opcode)813 bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
814 {
815    const nir_alu_src& src0 = instr.src[0];
816    const nir_alu_src& src1 = instr.src[1];
817 
818    AluInstruction *ir = nullptr;
819 
820    if (get_chip_class() == CAYMAN) {
821       for (int k = 0; k < 4; ++k) {
822          if (instr.dest.write_mask & (1 << k)) {
823 
824             for (int i = 0; i < 4; i++) {
825                ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[1][k], (i == k) ? write : empty);
826                if (src0.negate) ir->set_flag(alu_src0_neg);
827                if (src0.abs) ir->set_flag(alu_src0_abs);
828                if (src1.negate) ir->set_flag(alu_src1_neg);
829                if (src1.abs) ir->set_flag(alu_src1_abs);
830                if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
831                if (i == 3) ir->set_flag(alu_last_instr);
832                emit_instruction(ir);
833             }
834          }
835       }
836    } else {
837       for (int i = 0; i < 4 ; ++i) {
838          if (instr.dest.write_mask & (1 << i)){
839             ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
840             if (src0.negate) ir->set_flag(alu_src0_neg);
841             if (src0.abs) ir->set_flag(alu_src0_abs);
842             if (src1.negate) ir->set_flag(alu_src1_neg);
843             if (src1.abs) ir->set_flag(alu_src1_abs);
844             if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
845             emit_instruction(ir);
846          }
847       }
848    }
849    return true;
850 }
851 
emit_alu_op2_int(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts opts)852 bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
853 {
854 
855    const nir_alu_src& src0 = instr.src[0];
856    const nir_alu_src& src1 = instr.src[1];
857 
858    if (src0.negate || src1.negate ||
859        src0.abs || src1.abs) {
860       std::cerr << "R600: don't support modifiers with integer operations";
861       return false;
862    }
863    return emit_alu_op2(instr, opcode, opts);
864 }
865 
emit_alu_op2(const nir_alu_instr & instr,EAluOp opcode,AluOp2Opts ops)866 bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
867 {
868    const nir_alu_src *src0 = &instr.src[0];
869    const nir_alu_src *src1 = &instr.src[1];
870 
871    int idx0 = 0;
872    int idx1 = 1;
873    if (ops & op2_opt_reverse) {
874       std::swap(src0, src1);
875       std::swap(idx0, idx1);
876    }
877 
878    bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
879 
880    AluInstruction *ir = nullptr;
881    for (int i = 0; i < 4 ; ++i) {
882       if (instr.dest.write_mask & (1 << i)){
883          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
884                                  m_src[idx0][i], m_src[idx1][i], write);
885 
886          if (src0->negate) ir->set_flag(alu_src0_neg);
887          if (src0->abs) ir->set_flag(alu_src0_abs);
888          if (src1_negate) ir->set_flag(alu_src1_neg);
889          if (src1->abs) ir->set_flag(alu_src1_abs);
890          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
891          emit_instruction(ir);
892       }
893    }
894    if (ir)
895       ir->set_flag(alu_last_instr);
896    return true;
897 }
898 
emit_alu_op3(const nir_alu_instr & instr,EAluOp opcode,std::array<uint8_t,3> reorder)899 bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
900                                       std::array<uint8_t, 3> reorder)
901 {
902    const nir_alu_src *src[3];
903    src[0] = &instr.src[reorder[0]];
904    src[1] = &instr.src[reorder[1]];
905    src[2] = &instr.src[reorder[2]];
906 
907    AluInstruction *ir = nullptr;
908    for (int i = 0; i < 4 ; ++i) {
909       if (instr.dest.write_mask & (1 << i)){
910          ir = new AluInstruction(opcode, from_nir(instr.dest, i),
911                                  m_src[reorder[0]][i],
912                                  m_src[reorder[1]][i],
913                                  m_src[reorder[2]][i],
914                write);
915 
916          if (src[0]->negate) ir->set_flag(alu_src0_neg);
917          if (src[1]->negate) ir->set_flag(alu_src1_neg);
918          if (src[2]->negate) ir->set_flag(alu_src2_neg);
919 
920          if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
921          ir->set_flag(alu_write);
922          emit_instruction(ir);
923       }
924    }
925    make_last(ir);
926    return true;
927 }
928 
emit_alu_ineg(const nir_alu_instr & instr)929 bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
930 {
931    AluInstruction *ir = nullptr;
932    for (int i = 0; i < 4 ; ++i) {
933       if (instr.dest.write_mask & (1 << i)){
934          ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
935                                  m_src[0][i], write);
936          emit_instruction(ir);
937       }
938    }
939    if (ir)
940       ir->set_flag(alu_last_instr);
941 
942    return true;
943 }
944 
945 static const char swz[] = "xyzw01?_";
946 
split_alu_modifiers(const nir_alu_src & src,const GPRVector::Values & v,GPRVector::Values & out,int ncomp)947 void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
948                                              const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
949 {
950 
951    AluInstruction *alu = nullptr;
952    for (int i = 0; i < ncomp; ++i) {
953       alu  = new AluInstruction(op1_mov,  out[i], v[i], {alu_write});
954       if (src.abs)
955          alu->set_flag(alu_src0_abs);
956       if (src.negate)
957          alu->set_flag(alu_src0_neg);
958       emit_instruction(alu);
959    }
960    make_last(alu);
961 }
962 
emit_tex_fdd(const nir_alu_instr & instr,TexInstruction::Opcode op,bool fine)963 bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
964                                       bool fine)
965 {
966 
967    GPRVector::Values v;
968    std::array<int, 4> writemask = {0,1,2,3};
969 
970    int ncomp = nir_dest_num_components(instr.dest.dest);
971    GPRVector::Swizzle src_swz = {7,7,7,7};
972    for (auto i = 0; i < ncomp; ++i)
973       src_swz[i] = instr.src[0].swizzle[i];
974 
975    auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
976 
977    if (instr.src[0].abs || instr.src[0].negate) {
978       GPRVector tmp = get_temp_vec4();
979       split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
980       src = tmp;
981    }
982 
983    for (int i = 0; i < 4; ++i) {
984       writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
985       v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
986    }
987 
988    /* This is querying the dreivatives of the output fb, so we would either need
989     * access to the neighboring pixels or to the framebuffer. Neither is currently
990     * implemented */
991    GPRVector dst(v);
992 
993    auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
994    tex->set_dest_swizzle(writemask);
995 
996    if (fine)
997       tex->set_flag(TexInstruction::grad_fine);
998 
999    emit_instruction(tex);
1000 
1001    return true;
1002 }
1003 
emit_unpack_32_2x16_split_y(const nir_alu_instr & instr)1004 bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
1005 {
1006    auto tmp = get_temp_register();
1007    emit_instruction(op2_lshr_int, tmp,
1008    {m_src[0][0], PValue(new LiteralValue(16))},
1009    {alu_write, alu_last_instr});
1010 
1011    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1012                                   {tmp}, {alu_write, alu_last_instr});
1013 
1014    return true;
1015 }
1016 
emit_unpack_32_2x16_split_x(const nir_alu_instr & instr)1017 bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
1018 {
1019    emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
1020    {m_src[0][0]},{alu_write, alu_last_instr});
1021    return true;
1022 }
1023 
emit_pack_32_2x16_split(const nir_alu_instr & instr)1024 bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
1025 {
1026    PValue x = get_temp_register();
1027    PValue y = get_temp_register();
1028 
1029    emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
1030    emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
1031 
1032    emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
1033 
1034    emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
1035 
1036    return true;
1037 }
1038 
1039 }
1040