1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4
5define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
6; GFX9-LABEL: v_add_v2i16:
7; GFX9:       ; %bb.0:
8; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1
10; GFX9-NEXT:    s_setpc_b64 s[30:31]
11;
12; GFX8-LABEL: v_add_v2i16:
13; GFX8:       ; %bb.0:
14; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
16; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
17; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
18; GFX8-NEXT:    s_setpc_b64 s[30:31]
19  %add = add <2 x i16> %a, %b
20  ret <2 x i16> %add
21}
22
23define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
24; GFX9-LABEL: v_add_v2i16_fneg_lhs:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
28; GFX9-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX8-LABEL: v_add_v2i16_fneg_lhs:
31; GFX8:       ; %bb.0:
32; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
34; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
35; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
36; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
37; GFX8-NEXT:    s_setpc_b64 s[30:31]
38  %neg.a = fneg <2 x half> %a
39  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
40  %add = add <2 x i16> %cast.neg.a, %b
41  ret <2 x i16> %add
42}
43
44define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
45; GFX9-LABEL: v_add_v2i16_fneg_rhs:
46; GFX9:       ; %bb.0:
47; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
49; GFX9-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX8-LABEL: v_add_v2i16_fneg_rhs:
52; GFX8:       ; %bb.0:
53; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
55; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
56; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
57; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
58; GFX8-NEXT:    s_setpc_b64 s[30:31]
59  %neg.b = fneg <2 x half> %b
60  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
61  %add = add <2 x i16> %a, %cast.neg.b
62  ret <2 x i16> %add
63}
64
65define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
66; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
67; GFX9:       ; %bb.0:
68; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
70; GFX9-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX8-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
73; GFX8:       ; %bb.0:
74; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX8-NEXT:    s_mov_b32 s4, 0x80008000
76; GFX8-NEXT:    v_xor_b32_e32 v0, s4, v0
77; GFX8-NEXT:    v_xor_b32_e32 v1, s4, v1
78; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1
79; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
80; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
81; GFX8-NEXT:    s_setpc_b64 s[30:31]
82  %neg.a = fneg <2 x half> %a
83  %neg.b = fneg <2 x half> %b
84  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
85  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
86  %add = add <2 x i16> %cast.neg.a, %cast.neg.b
87  ret <2 x i16> %add
88}
89
90define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
91; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
92; GFX9:       ; %bb.0:
93; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX9-NEXT:    s_mov_b32 s4, 0xffffffc0
95; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
96; GFX9-NEXT:    v_pk_add_u16 v0, v0, s4
97; GFX9-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
100; GFX8:       ; %bb.0:
101; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX8-NEXT:    s_mov_b32 s4, 0xffc0
103; GFX8-NEXT:    v_mov_b32_e32 v2, s4
104; GFX8-NEXT:    v_add_u16_e32 v1, s4, v0
105; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
106; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
107; GFX8-NEXT:    s_setpc_b64 s[30:31]
108  %add = add <2 x i16> %a, <i16 -64, i16 -64>
109  ret <2 x i16> %add
110}
111
112define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
113; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo:
114; GFX9:       ; %bb.0:
115; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX9-NEXT:    s_pack_ll_b32_b16 s4, 0xffffffc0, 4
117; GFX9-NEXT:    v_pk_add_u16 v0, v0, s4
118; GFX9-NEXT:    s_setpc_b64 s[30:31]
119;
120; GFX8-LABEL: v_add_v2i16_neg_inline_imm_lo:
121; GFX8:       ; %bb.0:
122; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX8-NEXT:    v_mov_b32_e32 v2, 4
124; GFX8-NEXT:    v_add_u16_e32 v1, 0xffc0, v0
125; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
126; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
127; GFX8-NEXT:    s_setpc_b64 s[30:31]
128  %add = add <2 x i16> %a, <i16 -64, i16 4>
129  ret <2 x i16> %add
130}
131
132define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
133; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi:
134; GFX9:       ; %bb.0:
135; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX9-NEXT:    s_pack_ll_b32_b16 s4, 4, 0xffffffc0
137; GFX9-NEXT:    v_pk_add_u16 v0, v0, s4
138; GFX9-NEXT:    s_setpc_b64 s[30:31]
139;
140; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
141; GFX8:       ; %bb.0:
142; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffc0
144; GFX8-NEXT:    v_add_u16_e32 v2, 4, v0
145; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
146; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
147; GFX8-NEXT:    s_setpc_b64 s[30:31]
148  %add = add <2 x i16> %a, <i16 4, i16 -64>
149  ret <2 x i16> %add
150}
151
152define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
153; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
154; GFX9:       ; %bb.0:
155; GFX9-NEXT:    s_mov_b32 s1, 0xffffffc0
156; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s1
157; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
158; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
159; GFX9-NEXT:    s_add_i32 s0, s0, s1
160; GFX9-NEXT:    s_add_i32 s2, s2, s3
161; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
162; GFX9-NEXT:    ; return to shader part epilog
163;
164; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
165; GFX8:       ; %bb.0:
166; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
167; GFX8-NEXT:    s_mov_b32 s3, 0xffff
168; GFX8-NEXT:    s_mov_b32 s1, 0xffc0
169; GFX8-NEXT:    s_and_b32 s0, s0, s3
170; GFX8-NEXT:    s_and_b32 s2, s2, s3
171; GFX8-NEXT:    s_add_i32 s0, s0, s1
172; GFX8-NEXT:    s_add_i32 s2, s2, s1
173; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
174; GFX8-NEXT:    s_and_b32 s0, s0, s3
175; GFX8-NEXT:    s_or_b32 s0, s1, s0
176; GFX8-NEXT:    ; return to shader part epilog
177  %add = add <2 x i16> %a, <i16 -64, i16 -64>
178  %cast = bitcast <2 x i16> %add to i32
179  ret i32 %cast
180}
181
182define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
183; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo:
184; GFX9:       ; %bb.0:
185; GFX9-NEXT:    s_pack_ll_b32_b16 s1, 0xffffffc0, 4
186; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
187; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
188; GFX9-NEXT:    s_add_i32 s0, s0, s1
189; GFX9-NEXT:    s_add_i32 s2, s2, s3
190; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
191; GFX9-NEXT:    ; return to shader part epilog
192;
193; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
194; GFX8:       ; %bb.0:
195; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
196; GFX8-NEXT:    s_mov_b32 s2, 0xffff
197; GFX8-NEXT:    s_and_b32 s0, s0, s2
198; GFX8-NEXT:    s_and_b32 s1, s1, s2
199; GFX8-NEXT:    s_add_i32 s0, s0, 0xffc0
200; GFX8-NEXT:    s_add_i32 s1, s1, 4
201; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
202; GFX8-NEXT:    s_and_b32 s0, s0, s2
203; GFX8-NEXT:    s_or_b32 s0, s1, s0
204; GFX8-NEXT:    ; return to shader part epilog
205  %add = add <2 x i16> %a, <i16 -64, i16 4>
206  %cast = bitcast <2 x i16> %add to i32
207  ret i32 %cast
208}
209
210define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
211; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi:
212; GFX9:       ; %bb.0:
213; GFX9-NEXT:    s_pack_ll_b32_b16 s1, 4, 0xffffffc0
214; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
215; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
216; GFX9-NEXT:    s_add_i32 s0, s0, s1
217; GFX9-NEXT:    s_add_i32 s2, s2, s3
218; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
219; GFX9-NEXT:    ; return to shader part epilog
220;
221; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
222; GFX8:       ; %bb.0:
223; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
224; GFX8-NEXT:    s_mov_b32 s2, 0xffff
225; GFX8-NEXT:    s_and_b32 s0, s0, s2
226; GFX8-NEXT:    s_and_b32 s1, s1, s2
227; GFX8-NEXT:    s_add_i32 s0, s0, 4
228; GFX8-NEXT:    s_add_i32 s1, s1, 0xffc0
229; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
230; GFX8-NEXT:    s_and_b32 s0, s0, s2
231; GFX8-NEXT:    s_or_b32 s0, s1, s0
232; GFX8-NEXT:    ; return to shader part epilog
233  %add = add <2 x i16> %a, <i16 4, i16 -64>
234  %cast = bitcast <2 x i16> %add to i32
235  ret i32 %cast
236}
237
238define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
239; GFX9-LABEL: s_add_v2i16:
240; GFX9:       ; %bb.0:
241; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
242; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
243; GFX9-NEXT:    s_add_i32 s0, s0, s1
244; GFX9-NEXT:    s_add_i32 s2, s2, s3
245; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
246; GFX9-NEXT:    ; return to shader part epilog
247;
248; GFX8-LABEL: s_add_v2i16:
249; GFX8:       ; %bb.0:
250; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
251; GFX8-NEXT:    s_mov_b32 s3, 0xffff
252; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
253; GFX8-NEXT:    s_and_b32 s0, s0, s3
254; GFX8-NEXT:    s_and_b32 s1, s1, s3
255; GFX8-NEXT:    s_and_b32 s2, s2, s3
256; GFX8-NEXT:    s_and_b32 s4, s4, s3
257; GFX8-NEXT:    s_add_i32 s0, s0, s1
258; GFX8-NEXT:    s_add_i32 s2, s2, s4
259; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
260; GFX8-NEXT:    s_and_b32 s0, s0, s3
261; GFX8-NEXT:    s_or_b32 s0, s1, s0
262; GFX8-NEXT:    ; return to shader part epilog
263  %add = add <2 x i16> %a, %b
264  %cast = bitcast <2 x i16> %add to i32
265  ret i32 %cast
266}
267
268define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) {
269; GFX9-LABEL: s_add_v2i16_fneg_lhs:
270; GFX9:       ; %bb.0:
271; GFX9-NEXT:    s_xor_b32 s0, s0, 0x80008000
272; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
273; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
274; GFX9-NEXT:    s_add_i32 s0, s0, s1
275; GFX9-NEXT:    s_add_i32 s2, s2, s3
276; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
277; GFX9-NEXT:    ; return to shader part epilog
278;
279; GFX8-LABEL: s_add_v2i16_fneg_lhs:
280; GFX8:       ; %bb.0:
281; GFX8-NEXT:    s_xor_b32 s0, s0, 0x80008000
282; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
283; GFX8-NEXT:    s_mov_b32 s3, 0xffff
284; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
285; GFX8-NEXT:    s_and_b32 s0, s0, s3
286; GFX8-NEXT:    s_and_b32 s1, s1, s3
287; GFX8-NEXT:    s_and_b32 s2, s2, s3
288; GFX8-NEXT:    s_and_b32 s4, s4, s3
289; GFX8-NEXT:    s_add_i32 s0, s0, s1
290; GFX8-NEXT:    s_add_i32 s2, s2, s4
291; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
292; GFX8-NEXT:    s_and_b32 s0, s0, s3
293; GFX8-NEXT:    s_or_b32 s0, s1, s0
294; GFX8-NEXT:    ; return to shader part epilog
295  %neg.a = fneg <2 x half> %a
296  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
297  %add = add <2 x i16> %cast.neg.a, %b
298  %cast = bitcast <2 x i16> %add to i32
299  ret i32 %cast
300}
301
302define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) {
303; GFX9-LABEL: s_add_v2i16_fneg_rhs:
304; GFX9:       ; %bb.0:
305; GFX9-NEXT:    s_xor_b32 s1, s1, 0x80008000
306; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
307; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
308; GFX9-NEXT:    s_add_i32 s0, s0, s1
309; GFX9-NEXT:    s_add_i32 s2, s2, s3
310; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
311; GFX9-NEXT:    ; return to shader part epilog
312;
313; GFX8-LABEL: s_add_v2i16_fneg_rhs:
314; GFX8:       ; %bb.0:
315; GFX8-NEXT:    s_xor_b32 s1, s1, 0x80008000
316; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
317; GFX8-NEXT:    s_mov_b32 s3, 0xffff
318; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
319; GFX8-NEXT:    s_and_b32 s0, s0, s3
320; GFX8-NEXT:    s_and_b32 s1, s1, s3
321; GFX8-NEXT:    s_and_b32 s2, s2, s3
322; GFX8-NEXT:    s_and_b32 s4, s4, s3
323; GFX8-NEXT:    s_add_i32 s0, s0, s1
324; GFX8-NEXT:    s_add_i32 s2, s2, s4
325; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
326; GFX8-NEXT:    s_and_b32 s0, s0, s3
327; GFX8-NEXT:    s_or_b32 s0, s1, s0
328; GFX8-NEXT:    ; return to shader part epilog
329  %neg.b = fneg <2 x half> %b
330  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
331  %add = add <2 x i16> %a, %cast.neg.b
332  %cast = bitcast <2 x i16> %add to i32
333  ret i32 %cast
334}
335
336define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) {
337; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
338; GFX9:       ; %bb.0:
339; GFX9-NEXT:    s_mov_b32 s2, 0x80008000
340; GFX9-NEXT:    s_xor_b32 s1, s1, s2
341; GFX9-NEXT:    s_xor_b32 s0, s0, s2
342; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
343; GFX9-NEXT:    s_lshr_b32 s3, s1, 16
344; GFX9-NEXT:    s_add_i32 s0, s0, s1
345; GFX9-NEXT:    s_add_i32 s2, s2, s3
346; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
347; GFX9-NEXT:    ; return to shader part epilog
348;
349; GFX8-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
350; GFX8:       ; %bb.0:
351; GFX8-NEXT:    s_mov_b32 s2, 0x80008000
352; GFX8-NEXT:    s_xor_b32 s1, s1, s2
353; GFX8-NEXT:    s_xor_b32 s0, s0, s2
354; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
355; GFX8-NEXT:    s_mov_b32 s3, 0xffff
356; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
357; GFX8-NEXT:    s_and_b32 s0, s0, s3
358; GFX8-NEXT:    s_and_b32 s1, s1, s3
359; GFX8-NEXT:    s_and_b32 s2, s2, s3
360; GFX8-NEXT:    s_and_b32 s4, s4, s3
361; GFX8-NEXT:    s_add_i32 s0, s0, s1
362; GFX8-NEXT:    s_add_i32 s2, s2, s4
363; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
364; GFX8-NEXT:    s_and_b32 s0, s0, s3
365; GFX8-NEXT:    s_or_b32 s0, s1, s0
366; GFX8-NEXT:    ; return to shader part epilog
367  %neg.a = fneg <2 x half> %a
368  %neg.b = fneg <2 x half> %b
369  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
370  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
371  %add = add <2 x i16> %cast.neg.a, %cast.neg.b
372  %cast = bitcast <2 x i16> %add to i32
373  ret i32 %cast
374}
375