1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "helpers.h"
26 
27 using namespace aco;
28 
29 BEGIN_TEST(optimizer_postRA.vcmp)
30     PhysReg reg_v0(256);
31     PhysReg reg_s0(0);
32     PhysReg reg_s2(2);
33     PhysReg reg_s4(4);
34 
35     //>> v1: %a:v[0] = p_startpgm
36     ASSERTED bool setup_ok = setup_cs("v1", GFX8);
37     assert(setup_ok);
38 
39     auto &startpgm = bld.instructions->at(0);
40     assert(startpgm->opcode == aco_opcode::p_startpgm);
41     startpgm->definitions[0].setFixed(reg_v0);
42 
43     Temp v_in = inputs[0];
44 
45     {
46         /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
47 
48         //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
49         //! s2: %e:s[2-3] = p_cbranch_z %b:vcc
50         //! p_unit_test 0, %e:s[2-3]
51         auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
52                              Operand(v_in, reg_v0));
53         auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
54         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
55         writeout(0, Operand(br, reg_s2));
56     }
57 
58     //; del b, e
59 
60     {
61         /* When VCC is overwritten inbetween, don't optimize. */
62 
63         //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
64         //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
65         //! s2: %f:vcc = s_mov_b64 0
66         //! s2: %e:s[2-3] = p_cbranch_z %d:scc
67         //! p_unit_test 1, %e:s[2-3], %f:vcc
68         auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
69                              Operand(v_in, reg_v0));
70         auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
71         auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
72         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
73         writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
74     }
75 
76     //; del b, c, d, e, f
77 
78     {
79         /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
80 
81         //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
82         //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
83         //! s2: %e:s[2-3] = p_cbranch_z %d:scc
84         //! p_unit_test 2, %e:s[2-3]
85         auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
86                                  Operand(v_in, reg_v0));
87         auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
88         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
89         writeout(2, Operand(br, reg_s2));
90     }
91 
92     //; del b, c, d, e
93 
94     {
95         /* When the VCC isn't written by VOPC, don't optimize */
96 
97         //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
98         //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
99         //! s2: %e:s[2-3] = p_cbranch_z %d:scc
100         //! p_unit_test 2, %e:s[2-3]
101         auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
102                              Operand::c32(1u), Operand(reg_s4, bld.lm));
103         auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
104         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
105         writeout(2, Operand(br, reg_s2));
106     }
107 
108     //; del b, c, d, e, f, x
109 
110     {
111         /* When EXEC is overwritten inbetween, don't optimize. */
112 
113         //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
114         //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
115         //! s2: %f:exec = s_mov_b64 42
116         //! s2: %e:s[2-3] = p_cbranch_z %d:scc
117         //! p_unit_test 4, %e:s[2-3], %f:exec
118         auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
119                              Operand(v_in, reg_v0));
120         auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
121         auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
122         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
123         writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
124     }
125 
126     //; del b, c, d, e, f, x
127 
128     finish_optimizer_postRA_test();
129 END_TEST
130 
131 BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
132     //>> s1: %a, s2: %y, s1: %z = p_startpgm
133     ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
134     assert(setup_ok);
135 
136     PhysReg reg_s0{0};
137     PhysReg reg_s1{1};
138     PhysReg reg_s2{2};
139     PhysReg reg_s3{3};
140     PhysReg reg_s4{4};
141     PhysReg reg_s6{6};
142 
143     Temp in_0 = inputs[0];
144     Temp in_1 = inputs[1];
145     Temp in_2 = inputs[2];
146     Operand op_in_0(in_0);
147     op_in_0.setFixed(reg_s0);
148     Operand op_in_1(in_1);
149     op_in_1.setFixed(reg_s4);
150     Operand op_in_2(in_2);
151     op_in_2.setFixed(reg_s6);
152 
153     {
154         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
155         //! s2: %f:vcc = p_cbranch_nz %e:scc
156         //! p_unit_test 0, %f:vcc
157         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
158                              Operand::c32(0x40018u));
159         auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
160                              Operand::zero());
161         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
162         writeout(0, Operand(br, vcc));
163     }
164 
165     //; del d, e, f
166 
167     {
168         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
169         //! s2: %f:vcc = p_cbranch_z %e:scc
170         //! p_unit_test 1, %f:vcc
171         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
172                              Operand::c32(0x40018u));
173         auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
174                              Operand::zero());
175         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
176         writeout(1, Operand(br, vcc));
177     }
178 
179     //; del d, e, f
180 
181     {
182         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
183         //! s2: %f:vcc = p_cbranch_z %e:scc
184         //! p_unit_test 2, %f:vcc
185         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
186                              Operand::c32(0x40018u));
187         auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
188                              Operand::zero());
189         auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
190         writeout(2, Operand(br, vcc));
191     }
192 
193     //; del d, e, f
194 
195     {
196         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
197         //! s2: %f:vcc = p_cbranch_nz %e:scc
198         //! p_unit_test 3, %f:vcc
199         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
200                              Operand::c32(0x40018u));
201         auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
202                              Operand::zero());
203         auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
204         writeout(3, Operand(br, vcc));
205     }
206 
207     //; del d, e, f
208 
209     {
210         //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
211         //! s2: %f:vcc = p_cbranch_z %e:scc
212         //! p_unit_test 4, %f:vcc
213         auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
214                              Operand::c32(0x12345u));
215         auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
216                              Operand::zero(8));
217         auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
218         writeout(4, Operand(br, vcc));
219     }
220 
221     //; del d, e, f
222 
223     {
224         /* SCC is overwritten in between, don't optimize */
225 
226         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
227         //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
228         //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
229         //! s2: %f:vcc = p_cbranch_z %g:scc
230         //! p_unit_test 5, %f:vcc, %h:s[3]
231         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
232                              Operand::c32(0x40018u));
233         auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
234                              Operand::c32(1u));
235         auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
236                              Operand::zero());
237         auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
238         writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
239     }
240 
241     //; del d, e, f, g, h, x
242 
243     {
244         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
245         //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
246         //! p_unit_test 6, %f:s[4]
247         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
248                              Operand::c32(0x40018u));
249         auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
250                              Operand::zero());
251         auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
252         writeout(6, Operand(br, reg_s4));
253     }
254 
255     //; del d, e, f
256 
257     {
258         /* SCC is overwritten in between, don't optimize */
259 
260         //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
261         //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
262         //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
263         //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
264         //! p_unit_test 7, %f:s[4], %h:s[3]
265         auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
266                              Operand::c32(0x40018u));
267         auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
268                              Operand::c32(1u));
269         auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
270                              Operand::zero());
271         auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
272         writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
273     }
274 
275     //; del d, e, f, g, h, x
276 
277     finish_optimizer_postRA_test();
278 END_TEST
279 
280 BEGIN_TEST(optimizer_postRA.dpp)
281    //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm
282    if (!setup_cs("v1 v1 s2 s2", GFX10_3))
283       return;
284 
285    bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
286    bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
287    bld.instructions->at(0)->definitions[2].setFixed(vcc);
288    bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0));
289 
290    PhysReg reg_v0(256);
291    PhysReg reg_v2(258);
292    Operand a(inputs[0], PhysReg(256));
293    Operand b(inputs[1], PhysReg(257));
294    Operand c(inputs[2], vcc);
295    Operand d(inputs[3], PhysReg(0));
296 
297    /* basic optimization */
298    //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
299    //! p_unit_test 0, %res0:v[2]
300    Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
301    Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
302    writeout(0, Operand(res0, reg_v2));
303 
304    /* operand swapping */
305    //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
306    //! p_unit_test 1, %res1:v[2]
307    Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
308    Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2));
309    writeout(1, Operand(res1, reg_v2));
310 
311    //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
312    //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
313    //! p_unit_test 2, %res2:v[2]
314    Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
315    Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
316    writeout(2, Operand(res2, reg_v2));
317 
318    /* modifiers */
319    //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
320    //! p_unit_test 3, %res3:v[2]
321    auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
322    tmp3.instr->dpp().neg[0] = true;
323    Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b);
324    writeout(3, Operand(res3, reg_v2));
325 
326    //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
327    //! p_unit_test 4, %res4:v[2]
328    Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
329    auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
330    res4.instr->vop3().neg[0] = true;
331    writeout(4, Operand(res4, reg_v2));
332 
333    //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
334    //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp
335    //! p_unit_test 5, %res5:v[2]
336    Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
337    auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
338    res5.instr->vop3().clamp = true;
339    writeout(5, Operand(res5, reg_v2));
340 
341    //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
342    //! p_unit_test 6, %res6:v[2]
343    auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
344    tmp6.instr->dpp().neg[0] = true;
345    auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
346    res6.instr->vop3().abs[0] = true;
347    writeout(6, Operand(res6, reg_v2));
348 
349    //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1
350    //! p_unit_test 7, %res7:v[2]
351    Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
352    auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
353    res7.instr->vop3().abs[0] = true;
354    writeout(7, Operand(res7, reg_v2));
355 
356    /* vcc */
357    //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
358    //! p_unit_test 8, %res8:v[2]
359    Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
360    Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
361    writeout(8, Operand(res8, reg_v2));
362 
363    //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
364    //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
365    //! p_unit_test 9, %res9:v[2]
366    Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
367    Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
368    writeout(9, Operand(res9, reg_v2));
369 
370    /* control flow */
371    //! BB1
372    //! /* logical preds: / linear preds: BB0, / kind: uniform, */
373    //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
374    //! p_unit_test 10, %res10:v[2]
375    Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
376 
377    bld.reset(program->create_and_insert_block());
378    program->blocks[0].linear_succs.push_back(1);
379    program->blocks[1].linear_preds.push_back(0);
380 
381    Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b);
382    writeout(10, Operand(res10, reg_v2));
383 
384    /* can't combine if the v_mov_b32's operand is modified */
385    //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
386    //! v1: %tmp11_2:v[0] = v_mov_b32 0
387    //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1]
388    //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0]
389    Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
390    Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0));
391    Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b);
392    writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0));
393 
394    finish_optimizer_postRA_test();
395 END_TEST
396 
397