1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6
7define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
8; GFX6-LABEL: s_fshr_i7:
9; GFX6:       ; %bb.0:
10; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
11; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
12; GFX6-NEXT:    s_movk_i32 s3, 0x7f
13; GFX6-NEXT:    s_and_b32 s2, s2, s3
14; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
15; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
16; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
17; GFX6-NEXT:    s_and_b32 s1, s1, s3
18; GFX6-NEXT:    v_mul_lo_u32 v1, -7, v0
19; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
20; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
21; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
22; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 7
23; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
24; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
25; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
26; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
27; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 7, v0
28; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
29; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
30; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 6, v0
31; GFX6-NEXT:    v_and_b32_e32 v0, s3, v0
32; GFX6-NEXT:    v_and_b32_e32 v1, s3, v1
33; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
34; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
35; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
36; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
37; GFX6-NEXT:    ; return to shader part epilog
38;
39; GFX8-LABEL: s_fshr_i7:
40; GFX8:       ; %bb.0:
41; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
42; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
43; GFX8-NEXT:    s_movk_i32 s3, 0x7f
44; GFX8-NEXT:    s_and_b32 s2, s2, s3
45; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
46; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
47; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
48; GFX8-NEXT:    s_and_b32 s1, s1, s3
49; GFX8-NEXT:    v_mul_lo_u32 v1, -7, v0
50; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
51; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
52; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
53; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 7
54; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
55; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
56; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
57; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
58; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 7, v0
59; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
60; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
61; GFX8-NEXT:    v_sub_u16_e32 v1, 6, v0
62; GFX8-NEXT:    v_and_b32_e32 v0, s3, v0
63; GFX8-NEXT:    v_and_b32_e32 v1, s3, v1
64; GFX8-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
65; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
66; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
67; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
68; GFX8-NEXT:    ; return to shader part epilog
69;
70; GFX9-LABEL: s_fshr_i7:
71; GFX9:       ; %bb.0:
72; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
73; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
74; GFX9-NEXT:    s_movk_i32 s3, 0x7f
75; GFX9-NEXT:    s_and_b32 s2, s2, s3
76; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
77; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
78; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
79; GFX9-NEXT:    s_and_b32 s1, s1, s3
80; GFX9-NEXT:    v_mul_lo_u32 v1, -7, v0
81; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
82; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
83; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
84; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 7
85; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
86; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
87; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
88; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
89; GFX9-NEXT:    v_subrev_u32_e32 v1, 7, v0
90; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v0
91; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
92; GFX9-NEXT:    v_sub_u16_e32 v1, 6, v0
93; GFX9-NEXT:    v_and_b32_e32 v0, s3, v0
94; GFX9-NEXT:    v_and_b32_e32 v1, s3, v1
95; GFX9-NEXT:    v_lshlrev_b16_e64 v1, v1, s0
96; GFX9-NEXT:    v_lshrrev_b16_e64 v0, v0, s1
97; GFX9-NEXT:    v_or_b32_e32 v0, v1, v0
98; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
99; GFX9-NEXT:    ; return to shader part epilog
100;
101; GFX10-LABEL: s_fshr_i7:
102; GFX10:       ; %bb.0:
103; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 7
104; GFX10-NEXT:    s_movk_i32 s3, 0x7f
105; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
106; GFX10-NEXT:    s_and_b32 s2, s2, s3
107; GFX10-NEXT:    s_and_b32 s1, s1, s3
108; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
109; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
110; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
111; GFX10-NEXT:    v_mul_lo_u32 v1, -7, v0
112; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
113; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
114; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
115; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 7
116; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
117; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
118; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
119; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
120; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 7, v0
121; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v0
122; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
123; GFX10-NEXT:    v_sub_nc_u16 v1, 6, v0
124; GFX10-NEXT:    v_and_b32_e32 v0, s3, v0
125; GFX10-NEXT:    v_and_b32_e32 v1, s3, v1
126; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
127; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
128; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
129; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
130; GFX10-NEXT:    ; return to shader part epilog
131  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
132  ret i7 %result
133}
134
135define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
136; GFX6-LABEL: v_fshr_i7:
137; GFX6:       ; %bb.0:
138; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
140; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
141; GFX6-NEXT:    v_and_b32_e32 v2, 0x7f, v2
142; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
143; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
144; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
145; GFX6-NEXT:    v_mul_lo_u32 v4, -7, v3
146; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
147; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
148; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
149; GFX6-NEXT:    v_mov_b32_e32 v4, 0x7f
150; GFX6-NEXT:    v_and_b32_e32 v1, v1, v4
151; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 7
152; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
153; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
154; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
155; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
156; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 7, v2
157; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
158; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
159; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 6, v2
160; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
161; GFX6-NEXT:    v_and_b32_e32 v3, v3, v4
162; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
163; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
164; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
165; GFX6-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX8-LABEL: v_fshr_i7:
168; GFX8:       ; %bb.0:
169; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
171; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
172; GFX8-NEXT:    v_and_b32_e32 v2, 0x7f, v2
173; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
174; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
175; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
176; GFX8-NEXT:    v_mul_lo_u32 v4, -7, v3
177; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
178; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
179; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
180; GFX8-NEXT:    v_mov_b32_e32 v4, 0x7f
181; GFX8-NEXT:    v_and_b32_e32 v1, v1, v4
182; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 7
183; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
184; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
185; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
186; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
187; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 7, v2
188; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
189; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
190; GFX8-NEXT:    v_sub_u16_e32 v3, 6, v2
191; GFX8-NEXT:    v_and_b32_e32 v2, v2, v4
192; GFX8-NEXT:    v_and_b32_e32 v3, v3, v4
193; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
194; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
195; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
196; GFX8-NEXT:    s_setpc_b64 s[30:31]
197;
198; GFX9-LABEL: v_fshr_i7:
199; GFX9:       ; %bb.0:
200; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
202; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
203; GFX9-NEXT:    v_and_b32_e32 v2, 0x7f, v2
204; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
205; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
206; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
207; GFX9-NEXT:    v_mul_lo_u32 v4, -7, v3
208; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
209; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
210; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
211; GFX9-NEXT:    v_mov_b32_e32 v4, 0x7f
212; GFX9-NEXT:    v_and_b32_e32 v1, v1, v4
213; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 7
214; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
215; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
216; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
217; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
218; GFX9-NEXT:    v_subrev_u32_e32 v3, 7, v2
219; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 7, v2
220; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
221; GFX9-NEXT:    v_sub_u16_e32 v3, 6, v2
222; GFX9-NEXT:    v_and_b32_e32 v2, v2, v4
223; GFX9-NEXT:    v_and_b32_e32 v3, v3, v4
224; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
225; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v2, v1
226; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
227; GFX9-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX10-LABEL: v_fshr_i7:
230; GFX10:       ; %bb.0:
231; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
233; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 7
234; GFX10-NEXT:    v_and_b32_e32 v2, 0x7f, v2
235; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
236; GFX10-NEXT:    v_and_b32_e32 v1, 0x7f, v1
237; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
238; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
239; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
240; GFX10-NEXT:    v_mul_lo_u32 v4, -7, v3
241; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
242; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
243; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
244; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 7
245; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
246; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
247; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
248; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
249; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 7, v2
250; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 7, v2
251; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
252; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7f
253; GFX10-NEXT:    v_sub_nc_u16 v4, 6, v2
254; GFX10-NEXT:    v_and_b32_e32 v2, v2, v3
255; GFX10-NEXT:    v_and_b32_e32 v4, v4, v3
256; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
257; GFX10-NEXT:    v_lshlrev_b16 v0, v4, v0
258; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
259; GFX10-NEXT:    s_setpc_b64 s[30:31]
260  %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
261  ret i7 %result
262}
263
264define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
265; GFX6-LABEL: s_fshr_i8:
266; GFX6:       ; %bb.0:
267; GFX6-NEXT:    s_and_b32 s3, s2, 7
268; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
269; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
270; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
271; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
272; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
273; GFX6-NEXT:    s_or_b32 s0, s0, s1
274; GFX6-NEXT:    ; return to shader part epilog
275;
276; GFX8-LABEL: s_fshr_i8:
277; GFX8:       ; %bb.0:
278; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
279; GFX8-NEXT:    s_and_b32 s3, s2, 7
280; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
281; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
282; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
283; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
284; GFX8-NEXT:    s_lshr_b32 s1, s1, s3
285; GFX8-NEXT:    s_or_b32 s0, s0, s1
286; GFX8-NEXT:    ; return to shader part epilog
287;
288; GFX9-LABEL: s_fshr_i8:
289; GFX9:       ; %bb.0:
290; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
291; GFX9-NEXT:    s_and_b32 s3, s2, 7
292; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
293; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
294; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
295; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
296; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
297; GFX9-NEXT:    s_or_b32 s0, s0, s1
298; GFX9-NEXT:    ; return to shader part epilog
299;
300; GFX10-LABEL: s_fshr_i8:
301; GFX10:       ; %bb.0:
302; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
303; GFX10-NEXT:    s_and_b32 s3, s2, 7
304; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
305; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
306; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
307; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
308; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
309; GFX10-NEXT:    s_or_b32 s0, s0, s1
310; GFX10-NEXT:    ; return to shader part epilog
311  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
312  ret i8 %result
313}
314
315define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) {
316; GFX6-LABEL: v_fshr_i8:
317; GFX6:       ; %bb.0:
318; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GFX6-NEXT:    v_and_b32_e32 v3, 7, v2
320; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
321; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
322; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
323; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
324; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
325; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
326; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
327; GFX6-NEXT:    s_setpc_b64 s[30:31]
328;
329; GFX8-LABEL: v_fshr_i8:
330; GFX8:       ; %bb.0:
331; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332; GFX8-NEXT:    v_and_b32_e32 v3, 7, v2
333; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
334; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
335; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
336; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
337; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
338; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
339; GFX8-NEXT:    s_setpc_b64 s[30:31]
340;
341; GFX9-LABEL: v_fshr_i8:
342; GFX9:       ; %bb.0:
343; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX9-NEXT:    v_and_b32_e32 v3, 7, v2
345; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
346; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
347; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
348; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
349; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
350; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
351; GFX9-NEXT:    s_setpc_b64 s[30:31]
352;
353; GFX10-LABEL: v_fshr_i8:
354; GFX10:       ; %bb.0:
355; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
357; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
358; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
359; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
360; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
361; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
362; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
363; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
364; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
365; GFX10-NEXT:    s_setpc_b64 s[30:31]
366  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
367  ret i8 %result
368}
369
370define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
371; GFX6-LABEL: s_fshr_i8_4:
372; GFX6:       ; %bb.0:
373; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
374; GFX6-NEXT:    s_lshl_b32 s0, s0, 4
375; GFX6-NEXT:    s_lshr_b32 s1, s1, 4
376; GFX6-NEXT:    s_or_b32 s0, s0, s1
377; GFX6-NEXT:    ; return to shader part epilog
378;
379; GFX8-LABEL: s_fshr_i8_4:
380; GFX8:       ; %bb.0:
381; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
382; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
383; GFX8-NEXT:    s_lshl_b32 s0, s0, 4
384; GFX8-NEXT:    s_lshr_b32 s1, s1, 4
385; GFX8-NEXT:    s_or_b32 s0, s0, s1
386; GFX8-NEXT:    ; return to shader part epilog
387;
388; GFX9-LABEL: s_fshr_i8_4:
389; GFX9:       ; %bb.0:
390; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
391; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
392; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
393; GFX9-NEXT:    s_lshr_b32 s1, s1, 4
394; GFX9-NEXT:    s_or_b32 s0, s0, s1
395; GFX9-NEXT:    ; return to shader part epilog
396;
397; GFX10-LABEL: s_fshr_i8_4:
398; GFX10:       ; %bb.0:
399; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
400; GFX10-NEXT:    s_lshl_b32 s0, s0, 4
401; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
402; GFX10-NEXT:    s_lshr_b32 s1, s1, 4
403; GFX10-NEXT:    s_or_b32 s0, s0, s1
404; GFX10-NEXT:    ; return to shader part epilog
405  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
406  ret i8 %result
407}
408
409define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) {
410; GFX6-LABEL: v_fshr_i8_4:
411; GFX6:       ; %bb.0:
412; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
414; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
415; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 4, v1
416; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
417; GFX6-NEXT:    s_setpc_b64 s[30:31]
418;
419; GFX8-LABEL: v_fshr_i8_4:
420; GFX8:       ; %bb.0:
421; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX8-NEXT:    v_mov_b32_e32 v2, 4
423; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
424; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
425; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
426; GFX8-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX9-LABEL: v_fshr_i8_4:
429; GFX9:       ; %bb.0:
430; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX9-NEXT:    s_mov_b32 s4, 4
432; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 4, v0
433; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
434; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
435; GFX9-NEXT:    s_setpc_b64 s[30:31]
436;
437; GFX10-LABEL: v_fshr_i8_4:
438; GFX10:       ; %bb.0:
439; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
441; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
442; GFX10-NEXT:    v_lshlrev_b16 v0, 4, v0
443; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
444; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
445; GFX10-NEXT:    s_setpc_b64 s[30:31]
446  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
447  ret i8 %result
448}
449
450define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
451; GFX6-LABEL: s_fshr_i8_5:
452; GFX6:       ; %bb.0:
453; GFX6-NEXT:    s_and_b32 s1, s1, 0xff
454; GFX6-NEXT:    s_lshl_b32 s0, s0, 3
455; GFX6-NEXT:    s_lshr_b32 s1, s1, 5
456; GFX6-NEXT:    s_or_b32 s0, s0, s1
457; GFX6-NEXT:    ; return to shader part epilog
458;
459; GFX8-LABEL: s_fshr_i8_5:
460; GFX8:       ; %bb.0:
461; GFX8-NEXT:    s_and_b32 s1, s1, 0xff
462; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
463; GFX8-NEXT:    s_lshl_b32 s0, s0, 3
464; GFX8-NEXT:    s_lshr_b32 s1, s1, 5
465; GFX8-NEXT:    s_or_b32 s0, s0, s1
466; GFX8-NEXT:    ; return to shader part epilog
467;
468; GFX9-LABEL: s_fshr_i8_5:
469; GFX9:       ; %bb.0:
470; GFX9-NEXT:    s_and_b32 s1, s1, 0xff
471; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
472; GFX9-NEXT:    s_lshl_b32 s0, s0, 3
473; GFX9-NEXT:    s_lshr_b32 s1, s1, 5
474; GFX9-NEXT:    s_or_b32 s0, s0, s1
475; GFX9-NEXT:    ; return to shader part epilog
476;
477; GFX10-LABEL: s_fshr_i8_5:
478; GFX10:       ; %bb.0:
479; GFX10-NEXT:    s_and_b32 s1, s1, 0xff
480; GFX10-NEXT:    s_lshl_b32 s0, s0, 3
481; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
482; GFX10-NEXT:    s_lshr_b32 s1, s1, 5
483; GFX10-NEXT:    s_or_b32 s0, s0, s1
484; GFX10-NEXT:    ; return to shader part epilog
485  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
486  ret i8 %result
487}
488
489define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) {
490; GFX6-LABEL: v_fshr_i8_5:
491; GFX6:       ; %bb.0:
492; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493; GFX6-NEXT:    v_and_b32_e32 v1, 0xff, v1
494; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
495; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 5, v1
496; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
497; GFX6-NEXT:    s_setpc_b64 s[30:31]
498;
499; GFX8-LABEL: v_fshr_i8_5:
500; GFX8:       ; %bb.0:
501; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX8-NEXT:    v_mov_b32_e32 v2, 5
503; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
504; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
505; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
506; GFX8-NEXT:    s_setpc_b64 s[30:31]
507;
508; GFX9-LABEL: v_fshr_i8_5:
509; GFX9:       ; %bb.0:
510; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; GFX9-NEXT:    v_mov_b32_e32 v2, 5
512; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 3, v0
513; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
514; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
515; GFX9-NEXT:    s_setpc_b64 s[30:31]
516;
517; GFX10-LABEL: v_fshr_i8_5:
518; GFX10:       ; %bb.0:
519; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
521; GFX10-NEXT:    v_and_b32_e32 v1, 0xff, v1
522; GFX10-NEXT:    v_lshlrev_b16 v0, 3, v0
523; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
524; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
525; GFX10-NEXT:    s_setpc_b64 s[30:31]
526  %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
527  ret i8 %result
528}
529
530define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
531; GFX6-LABEL: s_fshr_v2i8:
532; GFX6:       ; %bb.0:
533; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
534; GFX6-NEXT:    s_lshr_b32 s4, s2, 8
535; GFX6-NEXT:    s_and_b32 s5, s2, 7
536; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
537; GFX6-NEXT:    s_movk_i32 s6, 0xff
538; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
539; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
540; GFX6-NEXT:    s_and_b32 s2, s1, s6
541; GFX6-NEXT:    s_lshr_b32 s2, s2, s5
542; GFX6-NEXT:    s_or_b32 s0, s0, s2
543; GFX6-NEXT:    s_and_b32 s2, s4, 7
544; GFX6-NEXT:    s_andn2_b32 s4, 7, s4
545; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
546; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80008
547; GFX6-NEXT:    s_lshl_b32 s3, s3, s4
548; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
549; GFX6-NEXT:    s_or_b32 s1, s3, s1
550; GFX6-NEXT:    s_and_b32 s1, s1, s6
551; GFX6-NEXT:    s_and_b32 s0, s0, s6
552; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
553; GFX6-NEXT:    s_or_b32 s0, s0, s1
554; GFX6-NEXT:    ; return to shader part epilog
555;
556; GFX8-LABEL: s_fshr_v2i8:
557; GFX8:       ; %bb.0:
558; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
559; GFX8-NEXT:    s_lshr_b32 s5, s2, 8
560; GFX8-NEXT:    s_and_b32 s6, s2, 7
561; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
562; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
563; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
564; GFX8-NEXT:    s_movk_i32 s2, 0xff
565; GFX8-NEXT:    s_lshr_b32 s4, s1, 8
566; GFX8-NEXT:    s_and_b32 s1, s1, s2
567; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
568; GFX8-NEXT:    s_lshr_b32 s1, s1, s6
569; GFX8-NEXT:    s_and_b32 s4, s4, s2
570; GFX8-NEXT:    s_or_b32 s0, s0, s1
571; GFX8-NEXT:    s_and_b32 s1, s5, 7
572; GFX8-NEXT:    s_andn2_b32 s5, 7, s5
573; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
574; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
575; GFX8-NEXT:    s_lshl_b32 s3, s3, s5
576; GFX8-NEXT:    s_lshr_b32 s1, s4, s1
577; GFX8-NEXT:    s_or_b32 s1, s3, s1
578; GFX8-NEXT:    s_and_b32 s0, s0, s2
579; GFX8-NEXT:    s_and_b32 s1, s1, s2
580; GFX8-NEXT:    s_bfe_u32 s2, 8, 0x100000
581; GFX8-NEXT:    s_lshl_b32 s1, s1, s2
582; GFX8-NEXT:    s_or_b32 s0, s0, s1
583; GFX8-NEXT:    ; return to shader part epilog
584;
585; GFX9-LABEL: s_fshr_v2i8:
586; GFX9:       ; %bb.0:
587; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
588; GFX9-NEXT:    s_lshr_b32 s5, s2, 8
589; GFX9-NEXT:    s_and_b32 s6, s2, 7
590; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
591; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
592; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
593; GFX9-NEXT:    s_movk_i32 s2, 0xff
594; GFX9-NEXT:    s_lshr_b32 s4, s1, 8
595; GFX9-NEXT:    s_and_b32 s1, s1, s2
596; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
597; GFX9-NEXT:    s_lshr_b32 s1, s1, s6
598; GFX9-NEXT:    s_and_b32 s4, s4, s2
599; GFX9-NEXT:    s_or_b32 s0, s0, s1
600; GFX9-NEXT:    s_and_b32 s1, s5, 7
601; GFX9-NEXT:    s_andn2_b32 s5, 7, s5
602; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
603; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
604; GFX9-NEXT:    s_lshl_b32 s3, s3, s5
605; GFX9-NEXT:    s_lshr_b32 s1, s4, s1
606; GFX9-NEXT:    s_or_b32 s1, s3, s1
607; GFX9-NEXT:    s_and_b32 s0, s0, s2
608; GFX9-NEXT:    s_and_b32 s1, s1, s2
609; GFX9-NEXT:    s_bfe_u32 s2, 8, 0x100000
610; GFX9-NEXT:    s_lshl_b32 s1, s1, s2
611; GFX9-NEXT:    s_or_b32 s0, s0, s1
612; GFX9-NEXT:    ; return to shader part epilog
613;
614; GFX10-LABEL: s_fshr_v2i8:
615; GFX10:       ; %bb.0:
616; GFX10-NEXT:    s_lshr_b32 s4, s1, 8
617; GFX10-NEXT:    s_movk_i32 s7, 0xff
618; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
619; GFX10-NEXT:    s_lshr_b32 s5, s2, 8
620; GFX10-NEXT:    s_and_b32 s6, s2, 7
621; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
622; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
623; GFX10-NEXT:    s_and_b32 s4, s4, s7
624; GFX10-NEXT:    s_and_b32 s1, s1, s7
625; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
626; GFX10-NEXT:    s_and_b32 s2, s5, 7
627; GFX10-NEXT:    s_andn2_b32 s5, 7, s5
628; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
629; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
630; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
631; GFX10-NEXT:    s_lshl_b32 s3, s3, s5
632; GFX10-NEXT:    s_lshr_b32 s2, s4, s2
633; GFX10-NEXT:    s_lshr_b32 s1, s1, s6
634; GFX10-NEXT:    s_or_b32 s2, s3, s2
635; GFX10-NEXT:    s_or_b32 s0, s0, s1
636; GFX10-NEXT:    s_and_b32 s1, s2, s7
637; GFX10-NEXT:    s_bfe_u32 s2, 8, 0x100000
638; GFX10-NEXT:    s_and_b32 s0, s0, s7
639; GFX10-NEXT:    s_lshl_b32 s1, s1, s2
640; GFX10-NEXT:    s_or_b32 s0, s0, s1
641; GFX10-NEXT:    ; return to shader part epilog
642  %lhs = bitcast i16 %lhs.arg to <2 x i8>
643  %rhs = bitcast i16 %rhs.arg to <2 x i8>
644  %amt = bitcast i16 %amt.arg to <2 x i8>
645  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
646  %cast.result = bitcast <2 x i8> %result to i16
647  ret i16 %cast.result
648}
649
650define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
651; GFX6-LABEL: v_fshr_v2i8:
652; GFX6:       ; %bb.0:
653; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 8, v2
655; GFX6-NEXT:    v_and_b32_e32 v5, 7, v2
656; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
657; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
658; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
659; GFX6-NEXT:    s_movk_i32 s4, 0xff
660; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
661; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
662; GFX6-NEXT:    v_and_b32_e32 v2, s4, v1
663; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v5, v2
664; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
665; GFX6-NEXT:    v_and_b32_e32 v2, 7, v4
666; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
667; GFX6-NEXT:    v_and_b32_e32 v4, 7, v4
668; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
669; GFX6-NEXT:    v_bfe_u32 v1, v1, 8, 8
670; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
671; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
672; GFX6-NEXT:    v_or_b32_e32 v1, v3, v1
673; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
674; GFX6-NEXT:    v_and_b32_e32 v0, 0xff, v0
675; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
676; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
677; GFX6-NEXT:    s_setpc_b64 s[30:31]
678;
679; GFX8-LABEL: v_fshr_v2i8:
680; GFX8:       ; %bb.0:
681; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
683; GFX8-NEXT:    v_and_b32_e32 v6, 7, v2
684; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
685; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
686; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
687; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
688; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
689; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
690; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
691; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v5
692; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
693; GFX8-NEXT:    v_and_b32_e32 v1, 7, v5
694; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
695; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
696; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v3
697; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
698; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
699; GFX8-NEXT:    v_and_b32_e32 v1, 0xff, v1
700; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
701; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
702; GFX8-NEXT:    s_setpc_b64 s[30:31]
703;
704; GFX9-LABEL: v_fshr_v2i8:
705; GFX9:       ; %bb.0:
706; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
708; GFX9-NEXT:    v_and_b32_e32 v6, 7, v2
709; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
710; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
711; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
712; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
713; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
714; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
715; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
716; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v5
717; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
718; GFX9-NEXT:    v_and_b32_e32 v1, 7, v5
719; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
720; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
721; GFX9-NEXT:    v_lshlrev_b16_e32 v2, v2, v3
722; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
723; GFX9-NEXT:    v_or_b32_e32 v1, v2, v1
724; GFX9-NEXT:    v_and_b32_e32 v1, 0xff, v1
725; GFX9-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
726; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
727; GFX9-NEXT:    s_setpc_b64 s[30:31]
728;
729; GFX10-LABEL: v_fshr_v2i8:
730; GFX10:       ; %bb.0:
731; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
733; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v2
734; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 8, v0
735; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
736; GFX10-NEXT:    s_movk_i32 s4, 0xff
737; GFX10-NEXT:    v_and_b32_e32 v7, 7, v2
738; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v3
739; GFX10-NEXT:    v_xor_b32_e32 v2, -1, v2
740; GFX10-NEXT:    v_and_b32_e32 v3, 7, v3
741; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
742; GFX10-NEXT:    v_and_b32_e32 v5, s4, v5
743; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
744; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
745; GFX10-NEXT:    v_and_b32_e32 v1, s4, v1
746; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
747; GFX10-NEXT:    v_lshrrev_b16 v3, v3, v5
748; GFX10-NEXT:    v_lshlrev_b16 v4, v6, v4
749; GFX10-NEXT:    v_lshrrev_b16 v1, v7, v1
750; GFX10-NEXT:    v_lshlrev_b16 v0, v2, v0
751; GFX10-NEXT:    v_or_b32_e32 v2, v4, v3
752; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
753; GFX10-NEXT:    v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
754; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
755; GFX10-NEXT:    s_setpc_b64 s[30:31]
756  %lhs = bitcast i16 %lhs.arg to <2 x i8>
757  %rhs = bitcast i16 %rhs.arg to <2 x i8>
758  %amt = bitcast i16 %amt.arg to <2 x i8>
759  %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
760  %cast.result = bitcast <2 x i8> %result to i16
761  ret i16 %cast.result
762}
763
764define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
765; GFX6-LABEL: s_fshr_v4i8:
766; GFX6:       ; %bb.0:
767; GFX6-NEXT:    s_lshr_b32 s3, s0, 8
768; GFX6-NEXT:    s_lshr_b32 s4, s0, 16
769; GFX6-NEXT:    s_lshr_b32 s5, s0, 24
770; GFX6-NEXT:    s_lshr_b32 s7, s2, 8
771; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
772; GFX6-NEXT:    s_lshr_b32 s9, s2, 24
773; GFX6-NEXT:    s_and_b32 s10, s2, 7
774; GFX6-NEXT:    s_andn2_b32 s2, 7, s2
775; GFX6-NEXT:    s_movk_i32 s11, 0xff
776; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
777; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
778; GFX6-NEXT:    s_and_b32 s2, s1, s11
779; GFX6-NEXT:    s_lshr_b32 s2, s2, s10
780; GFX6-NEXT:    s_or_b32 s0, s0, s2
781; GFX6-NEXT:    s_and_b32 s2, s7, 7
782; GFX6-NEXT:    s_andn2_b32 s7, 7, s7
783; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
784; GFX6-NEXT:    s_lshl_b32 s3, s3, s7
785; GFX6-NEXT:    s_bfe_u32 s7, s1, 0x80008
786; GFX6-NEXT:    s_lshr_b32 s2, s7, s2
787; GFX6-NEXT:    s_lshr_b32 s6, s1, 24
788; GFX6-NEXT:    s_or_b32 s2, s3, s2
789; GFX6-NEXT:    s_and_b32 s3, s8, 7
790; GFX6-NEXT:    s_andn2_b32 s7, 7, s8
791; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
792; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x80010
793; GFX6-NEXT:    s_lshl_b32 s4, s4, s7
794; GFX6-NEXT:    s_lshr_b32 s1, s1, s3
795; GFX6-NEXT:    s_or_b32 s1, s4, s1
796; GFX6-NEXT:    s_and_b32 s3, s9, 7
797; GFX6-NEXT:    s_andn2_b32 s4, 7, s9
798; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
799; GFX6-NEXT:    s_and_b32 s2, s2, s11
800; GFX6-NEXT:    s_lshl_b32 s4, s5, s4
801; GFX6-NEXT:    s_lshr_b32 s3, s6, s3
802; GFX6-NEXT:    s_and_b32 s0, s0, s11
803; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
804; GFX6-NEXT:    s_and_b32 s1, s1, s11
805; GFX6-NEXT:    s_or_b32 s3, s4, s3
806; GFX6-NEXT:    s_or_b32 s0, s0, s2
807; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
808; GFX6-NEXT:    s_or_b32 s0, s0, s1
809; GFX6-NEXT:    s_and_b32 s1, s3, s11
810; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
811; GFX6-NEXT:    s_or_b32 s0, s0, s1
812; GFX6-NEXT:    ; return to shader part epilog
813;
814; GFX8-LABEL: s_fshr_v4i8:
815; GFX8:       ; %bb.0:
816; GFX8-NEXT:    s_movk_i32 s13, 0xff
817; GFX8-NEXT:    s_lshr_b32 s3, s0, 8
818; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
819; GFX8-NEXT:    s_lshr_b32 s5, s0, 24
820; GFX8-NEXT:    s_lshr_b32 s6, s1, 8
821; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
822; GFX8-NEXT:    s_lshr_b32 s8, s1, 24
823; GFX8-NEXT:    s_lshr_b32 s9, s2, 8
824; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
825; GFX8-NEXT:    s_lshr_b32 s11, s2, 24
826; GFX8-NEXT:    s_and_b32 s12, s2, 7
827; GFX8-NEXT:    s_andn2_b32 s2, 7, s2
828; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
829; GFX8-NEXT:    s_and_b32 s1, s1, s13
830; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
831; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
832; GFX8-NEXT:    s_andn2_b32 s2, 7, s9
833; GFX8-NEXT:    s_lshl_b32 s3, s3, 1
834; GFX8-NEXT:    s_lshr_b32 s1, s1, s12
835; GFX8-NEXT:    s_lshl_b32 s2, s3, s2
836; GFX8-NEXT:    s_and_b32 s3, s6, s13
837; GFX8-NEXT:    s_or_b32 s0, s0, s1
838; GFX8-NEXT:    s_and_b32 s1, s9, 7
839; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
840; GFX8-NEXT:    s_lshr_b32 s1, s3, s1
841; GFX8-NEXT:    s_andn2_b32 s3, 7, s10
842; GFX8-NEXT:    s_lshl_b32 s4, s4, 1
843; GFX8-NEXT:    s_lshl_b32 s3, s4, s3
844; GFX8-NEXT:    s_and_b32 s4, s7, s13
845; GFX8-NEXT:    s_or_b32 s1, s2, s1
846; GFX8-NEXT:    s_and_b32 s2, s10, 7
847; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
848; GFX8-NEXT:    s_lshr_b32 s2, s4, s2
849; GFX8-NEXT:    s_and_b32 s1, s1, s13
850; GFX8-NEXT:    s_or_b32 s2, s3, s2
851; GFX8-NEXT:    s_and_b32 s3, s11, 7
852; GFX8-NEXT:    s_andn2_b32 s4, 7, s11
853; GFX8-NEXT:    s_lshl_b32 s5, s5, 1
854; GFX8-NEXT:    s_and_b32 s0, s0, s13
855; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
856; GFX8-NEXT:    s_lshl_b32 s4, s5, s4
857; GFX8-NEXT:    s_lshr_b32 s3, s8, s3
858; GFX8-NEXT:    s_or_b32 s0, s0, s1
859; GFX8-NEXT:    s_and_b32 s1, s2, s13
860; GFX8-NEXT:    s_or_b32 s3, s4, s3
861; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
862; GFX8-NEXT:    s_or_b32 s0, s0, s1
863; GFX8-NEXT:    s_and_b32 s1, s3, s13
864; GFX8-NEXT:    s_lshl_b32 s1, s1, 24
865; GFX8-NEXT:    s_or_b32 s0, s0, s1
866; GFX8-NEXT:    ; return to shader part epilog
867;
868; GFX9-LABEL: s_fshr_v4i8:
869; GFX9:       ; %bb.0:
870; GFX9-NEXT:    s_movk_i32 s13, 0xff
871; GFX9-NEXT:    s_lshr_b32 s3, s0, 8
872; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
873; GFX9-NEXT:    s_lshr_b32 s5, s0, 24
874; GFX9-NEXT:    s_lshr_b32 s6, s1, 8
875; GFX9-NEXT:    s_lshr_b32 s7, s1, 16
876; GFX9-NEXT:    s_lshr_b32 s8, s1, 24
877; GFX9-NEXT:    s_lshr_b32 s9, s2, 8
878; GFX9-NEXT:    s_lshr_b32 s10, s2, 16
879; GFX9-NEXT:    s_lshr_b32 s11, s2, 24
880; GFX9-NEXT:    s_and_b32 s12, s2, 7
881; GFX9-NEXT:    s_andn2_b32 s2, 7, s2
882; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
883; GFX9-NEXT:    s_and_b32 s1, s1, s13
884; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
885; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
886; GFX9-NEXT:    s_andn2_b32 s2, 7, s9
887; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
888; GFX9-NEXT:    s_lshr_b32 s1, s1, s12
889; GFX9-NEXT:    s_lshl_b32 s2, s3, s2
890; GFX9-NEXT:    s_and_b32 s3, s6, s13
891; GFX9-NEXT:    s_or_b32 s0, s0, s1
892; GFX9-NEXT:    s_and_b32 s1, s9, 7
893; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
894; GFX9-NEXT:    s_lshr_b32 s1, s3, s1
895; GFX9-NEXT:    s_andn2_b32 s3, 7, s10
896; GFX9-NEXT:    s_lshl_b32 s4, s4, 1
897; GFX9-NEXT:    s_lshl_b32 s3, s4, s3
898; GFX9-NEXT:    s_and_b32 s4, s7, s13
899; GFX9-NEXT:    s_or_b32 s1, s2, s1
900; GFX9-NEXT:    s_and_b32 s2, s10, 7
901; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
902; GFX9-NEXT:    s_lshr_b32 s2, s4, s2
903; GFX9-NEXT:    s_and_b32 s1, s1, s13
904; GFX9-NEXT:    s_or_b32 s2, s3, s2
905; GFX9-NEXT:    s_and_b32 s3, s11, 7
906; GFX9-NEXT:    s_andn2_b32 s4, 7, s11
907; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
908; GFX9-NEXT:    s_and_b32 s0, s0, s13
909; GFX9-NEXT:    s_lshl_b32 s1, s1, 8
910; GFX9-NEXT:    s_lshl_b32 s4, s5, s4
911; GFX9-NEXT:    s_lshr_b32 s3, s8, s3
912; GFX9-NEXT:    s_or_b32 s0, s0, s1
913; GFX9-NEXT:    s_and_b32 s1, s2, s13
914; GFX9-NEXT:    s_or_b32 s3, s4, s3
915; GFX9-NEXT:    s_lshl_b32 s1, s1, 16
916; GFX9-NEXT:    s_or_b32 s0, s0, s1
917; GFX9-NEXT:    s_and_b32 s1, s3, s13
918; GFX9-NEXT:    s_lshl_b32 s1, s1, 24
919; GFX9-NEXT:    s_or_b32 s0, s0, s1
920; GFX9-NEXT:    ; return to shader part epilog
921;
922; GFX10-LABEL: s_fshr_v4i8:
923; GFX10:       ; %bb.0:
924; GFX10-NEXT:    s_lshr_b32 s6, s1, 8
925; GFX10-NEXT:    s_movk_i32 s13, 0xff
926; GFX10-NEXT:    s_lshr_b32 s3, s0, 8
927; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
928; GFX10-NEXT:    s_lshr_b32 s5, s0, 24
929; GFX10-NEXT:    s_lshr_b32 s7, s1, 16
930; GFX10-NEXT:    s_lshr_b32 s8, s1, 24
931; GFX10-NEXT:    s_lshr_b32 s9, s2, 8
932; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
933; GFX10-NEXT:    s_lshr_b32 s11, s2, 24
934; GFX10-NEXT:    s_and_b32 s12, s2, 7
935; GFX10-NEXT:    s_andn2_b32 s2, 7, s2
936; GFX10-NEXT:    s_and_b32 s1, s1, s13
937; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
938; GFX10-NEXT:    s_and_b32 s6, s6, s13
939; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
940; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
941; GFX10-NEXT:    s_and_b32 s2, s9, 7
942; GFX10-NEXT:    s_andn2_b32 s9, 7, s9
943; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
944; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
945; GFX10-NEXT:    s_lshr_b32 s1, s1, s12
946; GFX10-NEXT:    s_lshl_b32 s3, s3, s9
947; GFX10-NEXT:    s_lshr_b32 s2, s6, s2
948; GFX10-NEXT:    s_and_b32 s6, s7, s13
949; GFX10-NEXT:    s_or_b32 s0, s0, s1
950; GFX10-NEXT:    s_or_b32 s1, s3, s2
951; GFX10-NEXT:    s_and_b32 s2, s10, 7
952; GFX10-NEXT:    s_andn2_b32 s3, 7, s10
953; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
954; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
955; GFX10-NEXT:    s_lshl_b32 s3, s4, s3
956; GFX10-NEXT:    s_lshr_b32 s2, s6, s2
957; GFX10-NEXT:    s_andn2_b32 s4, 7, s11
958; GFX10-NEXT:    s_lshl_b32 s5, s5, 1
959; GFX10-NEXT:    s_and_b32 s6, s11, 7
960; GFX10-NEXT:    s_lshl_b32 s4, s5, s4
961; GFX10-NEXT:    s_lshr_b32 s5, s8, s6
962; GFX10-NEXT:    s_or_b32 s2, s3, s2
963; GFX10-NEXT:    s_and_b32 s1, s1, s13
964; GFX10-NEXT:    s_or_b32 s3, s4, s5
965; GFX10-NEXT:    s_and_b32 s0, s0, s13
966; GFX10-NEXT:    s_lshl_b32 s1, s1, 8
967; GFX10-NEXT:    s_and_b32 s2, s2, s13
968; GFX10-NEXT:    s_or_b32 s0, s0, s1
969; GFX10-NEXT:    s_lshl_b32 s1, s2, 16
970; GFX10-NEXT:    s_and_b32 s2, s3, s13
971; GFX10-NEXT:    s_or_b32 s0, s0, s1
972; GFX10-NEXT:    s_lshl_b32 s1, s2, 24
973; GFX10-NEXT:    s_or_b32 s0, s0, s1
974; GFX10-NEXT:    ; return to shader part epilog
975  %lhs = bitcast i32 %lhs.arg to <4 x i8>
976  %rhs = bitcast i32 %rhs.arg to <4 x i8>
977  %amt = bitcast i32 %amt.arg to <4 x i8>
978  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
979  %cast.result = bitcast <4 x i8> %result to i32
980  ret i32 %cast.result
981}
982
983define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
984; GFX6-LABEL: v_fshr_v4i8:
985; GFX6:       ; %bb.0:
986; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
987; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 8, v2
988; GFX6-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
989; GFX6-NEXT:    v_lshrrev_b32_e32 v9, 24, v2
990; GFX6-NEXT:    v_and_b32_e32 v10, 7, v2
991; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
992; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
993; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
994; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
995; GFX6-NEXT:    v_and_b32_e32 v2, 7, v2
996; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
997; GFX6-NEXT:    v_and_b32_e32 v11, 0xff, v1
998; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
999; GFX6-NEXT:    v_lshrrev_b32_e32 v10, v10, v11
1000; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
1001; GFX6-NEXT:    v_and_b32_e32 v10, 7, v7
1002; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v7
1003; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1004; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
1005; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v7, v3
1006; GFX6-NEXT:    v_bfe_u32 v7, v1, 8, 8
1007; GFX6-NEXT:    v_lshrrev_b32_e32 v7, v10, v7
1008; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
1009; GFX6-NEXT:    v_and_b32_e32 v7, 7, v8
1010; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
1011; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 24, v1
1012; GFX6-NEXT:    v_and_b32_e32 v8, 7, v8
1013; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
1014; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1015; GFX6-NEXT:    v_mov_b32_e32 v2, 0xff
1016; GFX6-NEXT:    v_lshlrev_b32_e32 v4, v8, v4
1017; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v7, v1
1018; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v9
1019; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
1020; GFX6-NEXT:    v_and_b32_e32 v4, 7, v9
1021; GFX6-NEXT:    v_and_b32_e32 v7, 7, v7
1022; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
1023; GFX6-NEXT:    v_and_b32_e32 v3, v3, v2
1024; GFX6-NEXT:    v_lshlrev_b32_e32 v5, v7, v5
1025; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v4, v6
1026; GFX6-NEXT:    v_and_b32_e32 v0, v0, v2
1027; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1028; GFX6-NEXT:    v_and_b32_e32 v1, v1, v2
1029; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
1030; GFX6-NEXT:    v_or_b32_e32 v0, v0, v3
1031; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1032; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1033; GFX6-NEXT:    v_and_b32_e32 v1, v4, v2
1034; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
1035; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1036; GFX6-NEXT:    s_setpc_b64 s[30:31]
1037;
1038; GFX8-LABEL: v_fshr_v4i8:
1039; GFX8:       ; %bb.0:
1040; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1042; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1043; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
1044; GFX8-NEXT:    v_and_b32_e32 v8, 7, v2
1045; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
1046; GFX8-NEXT:    v_and_b32_e32 v2, 7, v2
1047; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v0
1048; GFX8-NEXT:    v_lshlrev_b16_e32 v2, v2, v9
1049; GFX8-NEXT:    v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1050; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1051; GFX8-NEXT:    v_or_b32_e32 v2, v2, v8
1052; GFX8-NEXT:    v_and_b32_e32 v8, 7, v5
1053; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
1054; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1055; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1056; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1057; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v5, v3
1058; GFX8-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1059; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
1060; GFX8-NEXT:    v_and_b32_e32 v4, 7, v6
1061; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v6
1062; GFX8-NEXT:    v_mov_b32_e32 v6, 1
1063; GFX8-NEXT:    v_mov_b32_e32 v9, 0xff
1064; GFX8-NEXT:    v_and_b32_e32 v5, 7, v5
1065; GFX8-NEXT:    v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1066; GFX8-NEXT:    v_lshlrev_b16_e32 v5, v5, v8
1067; GFX8-NEXT:    v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1068; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v8
1069; GFX8-NEXT:    v_or_b32_e32 v4, v5, v4
1070; GFX8-NEXT:    v_and_b32_e32 v5, 7, v7
1071; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v7
1072; GFX8-NEXT:    v_and_b32_e32 v7, 7, v7
1073; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1074; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
1075; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1076; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1077; GFX8-NEXT:    v_mov_b32_e32 v1, 8
1078; GFX8-NEXT:    s_movk_i32 s4, 0xff
1079; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1080; GFX8-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1081; GFX8-NEXT:    v_and_b32_e32 v2, s4, v4
1082; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1083; GFX8-NEXT:    v_and_b32_e32 v0, s4, v0
1084; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
1085; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1086; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1087; GFX8-NEXT:    s_setpc_b64 s[30:31]
1088;
1089; GFX9-LABEL: v_fshr_v4i8:
1090; GFX9:       ; %bb.0:
1091; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v2
1093; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1094; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v2
1095; GFX9-NEXT:    v_and_b32_e32 v8, 7, v2
1096; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
1097; GFX9-NEXT:    v_and_b32_e32 v2, 7, v2
1098; GFX9-NEXT:    v_lshlrev_b16_e32 v9, 1, v0
1099; GFX9-NEXT:    v_lshlrev_b16_e32 v2, v2, v9
1100; GFX9-NEXT:    v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1101; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1102; GFX9-NEXT:    v_or_b32_e32 v2, v2, v8
1103; GFX9-NEXT:    v_and_b32_e32 v8, 7, v5
1104; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v5
1105; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
1106; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1107; GFX9-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
1108; GFX9-NEXT:    v_lshlrev_b16_e32 v3, v5, v3
1109; GFX9-NEXT:    v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1110; GFX9-NEXT:    v_or_b32_e32 v3, v3, v4
1111; GFX9-NEXT:    v_and_b32_e32 v4, 7, v6
1112; GFX9-NEXT:    v_xor_b32_e32 v5, -1, v6
1113; GFX9-NEXT:    v_mov_b32_e32 v6, 1
1114; GFX9-NEXT:    v_mov_b32_e32 v9, 0xff
1115; GFX9-NEXT:    v_and_b32_e32 v5, 7, v5
1116; GFX9-NEXT:    v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1117; GFX9-NEXT:    v_lshlrev_b16_e32 v5, v5, v8
1118; GFX9-NEXT:    v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1119; GFX9-NEXT:    v_lshrrev_b16_e32 v4, v4, v8
1120; GFX9-NEXT:    v_or_b32_e32 v4, v5, v4
1121; GFX9-NEXT:    v_and_b32_e32 v5, 7, v7
1122; GFX9-NEXT:    v_xor_b32_e32 v7, -1, v7
1123; GFX9-NEXT:    v_and_b32_e32 v7, 7, v7
1124; GFX9-NEXT:    v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1125; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v7, v0
1126; GFX9-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1127; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
1128; GFX9-NEXT:    v_mov_b32_e32 v1, 8
1129; GFX9-NEXT:    s_movk_i32 s4, 0xff
1130; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1131; GFX9-NEXT:    v_and_or_b32 v1, v2, s4, v1
1132; GFX9-NEXT:    v_and_b32_e32 v2, s4, v4
1133; GFX9-NEXT:    v_and_b32_e32 v0, s4, v0
1134; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1135; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 24, v0
1136; GFX9-NEXT:    v_or3_b32 v0, v1, v2, v0
1137; GFX9-NEXT:    s_setpc_b64 s[30:31]
1138;
1139; GFX10-LABEL: v_fshr_v4i8:
1140; GFX10:       ; %bb.0:
1141; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1143; GFX10-NEXT:    v_lshrrev_b32_e32 v6, 8, v2
1144; GFX10-NEXT:    v_lshrrev_b32_e32 v3, 8, v0
1145; GFX10-NEXT:    v_xor_b32_e32 v8, -1, v2
1146; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1147; GFX10-NEXT:    v_lshrrev_b32_e32 v12, 24, v2
1148; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v6
1149; GFX10-NEXT:    v_lshlrev_b16 v3, 1, v3
1150; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1151; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 24, v0
1152; GFX10-NEXT:    v_lshrrev_b32_e32 v7, 8, v1
1153; GFX10-NEXT:    v_and_b32_e32 v11, 7, v11
1154; GFX10-NEXT:    v_and_b32_e32 v8, 7, v8
1155; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
1156; GFX10-NEXT:    v_mov_b32_e32 v13, 0xff
1157; GFX10-NEXT:    v_xor_b32_e32 v14, -1, v12
1158; GFX10-NEXT:    v_lshlrev_b16 v3, v11, v3
1159; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v10
1160; GFX10-NEXT:    s_movk_i32 s4, 0xff
1161; GFX10-NEXT:    v_lshrrev_b32_e32 v9, 24, v1
1162; GFX10-NEXT:    v_lshlrev_b16 v0, v8, v0
1163; GFX10-NEXT:    v_and_b32_e32 v8, s4, v1
1164; GFX10-NEXT:    v_and_b32_e32 v6, 7, v6
1165; GFX10-NEXT:    v_and_b32_e32 v7, s4, v7
1166; GFX10-NEXT:    v_and_b32_e32 v10, 7, v10
1167; GFX10-NEXT:    v_and_b32_e32 v11, 7, v11
1168; GFX10-NEXT:    v_lshlrev_b16 v4, 1, v4
1169; GFX10-NEXT:    v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1170; GFX10-NEXT:    v_and_b32_e32 v13, 7, v14
1171; GFX10-NEXT:    v_lshlrev_b16 v5, 1, v5
1172; GFX10-NEXT:    v_and_b32_e32 v12, 7, v12
1173; GFX10-NEXT:    v_and_b32_e32 v2, 7, v2
1174; GFX10-NEXT:    v_lshrrev_b16 v6, v6, v7
1175; GFX10-NEXT:    v_lshlrev_b16 v4, v11, v4
1176; GFX10-NEXT:    v_lshrrev_b16 v1, v10, v1
1177; GFX10-NEXT:    v_lshlrev_b16 v5, v13, v5
1178; GFX10-NEXT:    v_lshrrev_b16 v7, v12, v9
1179; GFX10-NEXT:    v_lshrrev_b16 v2, v2, v8
1180; GFX10-NEXT:    v_or_b32_e32 v3, v3, v6
1181; GFX10-NEXT:    v_mov_b32_e32 v6, 8
1182; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
1183; GFX10-NEXT:    v_or_b32_e32 v4, v5, v7
1184; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
1185; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1186; GFX10-NEXT:    v_and_b32_e32 v1, s4, v1
1187; GFX10-NEXT:    v_and_b32_e32 v3, s4, v4
1188; GFX10-NEXT:    v_and_or_b32 v0, v0, s4, v2
1189; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1190; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
1191; GFX10-NEXT:    v_or3_b32 v0, v0, v1, v2
1192; GFX10-NEXT:    s_setpc_b64 s[30:31]
1193  %lhs = bitcast i32 %lhs.arg to <4 x i8>
1194  %rhs = bitcast i32 %rhs.arg to <4 x i8>
1195  %amt = bitcast i32 %amt.arg to <4 x i8>
1196  %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1197  %cast.result = bitcast <4 x i8> %result to i32
1198  ret i32 %cast.result
1199}
1200
1201define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1202; GFX6-LABEL: s_fshr_i24:
1203; GFX6:       ; %bb.0:
1204; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1205; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1206; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1207; GFX6-NEXT:    s_mov_b32 s3, 0xffffff
1208; GFX6-NEXT:    s_and_b32 s2, s2, s3
1209; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1210; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1211; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1212; GFX6-NEXT:    s_and_b32 s1, s1, s3
1213; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v0
1214; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
1215; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1216; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
1217; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1218; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
1219; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
1220; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1221; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1222; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, 24, v0
1223; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1224; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1225; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, 23, v0
1226; GFX6-NEXT:    v_and_b32_e32 v0, s3, v0
1227; GFX6-NEXT:    v_and_b32_e32 v1, s3, v1
1228; GFX6-NEXT:    v_lshl_b32_e32 v1, s0, v1
1229; GFX6-NEXT:    v_lshr_b32_e32 v0, s1, v0
1230; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
1231; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1232; GFX6-NEXT:    ; return to shader part epilog
1233;
1234; GFX8-LABEL: s_fshr_i24:
1235; GFX8:       ; %bb.0:
1236; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1237; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1238; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1239; GFX8-NEXT:    s_mov_b32 s3, 0xffffff
1240; GFX8-NEXT:    s_and_b32 s2, s2, s3
1241; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1242; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1243; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1244; GFX8-NEXT:    s_and_b32 s1, s1, s3
1245; GFX8-NEXT:    v_mul_lo_u32 v1, v1, v0
1246; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
1247; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
1248; GFX8-NEXT:    v_mul_hi_u32 v0, s2, v0
1249; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1250; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s2, v0
1251; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
1252; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1253; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1254; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, 24, v0
1255; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1256; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1257; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, 23, v0
1258; GFX8-NEXT:    v_and_b32_e32 v0, s3, v0
1259; GFX8-NEXT:    v_and_b32_e32 v1, s3, v1
1260; GFX8-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
1261; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1262; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
1263; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1264; GFX8-NEXT:    ; return to shader part epilog
1265;
1266; GFX9-LABEL: s_fshr_i24:
1267; GFX9:       ; %bb.0:
1268; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1269; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1270; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1271; GFX9-NEXT:    s_mov_b32 s3, 0xffffff
1272; GFX9-NEXT:    s_and_b32 s2, s2, s3
1273; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1274; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1275; GFX9-NEXT:    s_and_b32 s1, s1, s3
1276; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1277; GFX9-NEXT:    v_mul_lo_u32 v1, v1, v0
1278; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
1279; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
1280; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
1281; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1282; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
1283; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
1284; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1285; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1286; GFX9-NEXT:    v_subrev_u32_e32 v1, 24, v0
1287; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1288; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1289; GFX9-NEXT:    v_sub_u32_e32 v1, 23, v0
1290; GFX9-NEXT:    v_and_b32_e32 v0, s3, v0
1291; GFX9-NEXT:    v_and_b32_e32 v1, s3, v1
1292; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1293; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1294; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1295; GFX9-NEXT:    ; return to shader part epilog
1296;
1297; GFX10-LABEL: s_fshr_i24:
1298; GFX10:       ; %bb.0:
1299; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1300; GFX10-NEXT:    s_mov_b32 s3, 0xffffff
1301; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1302; GFX10-NEXT:    s_and_b32 s2, s2, s3
1303; GFX10-NEXT:    s_and_b32 s1, s1, s3
1304; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1305; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1306; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1307; GFX10-NEXT:    v_mul_lo_u32 v1, 0xffffffe8, v0
1308; GFX10-NEXT:    v_mul_hi_u32 v1, v0, v1
1309; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1310; GFX10-NEXT:    v_mul_hi_u32 v0, s2, v0
1311; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1312; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s2, v0
1313; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1314; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1315; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1316; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, 24, v0
1317; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1318; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1319; GFX10-NEXT:    v_sub_nc_u32_e32 v1, 23, v0
1320; GFX10-NEXT:    v_and_b32_e32 v0, s3, v0
1321; GFX10-NEXT:    v_and_b32_e32 v1, s3, v1
1322; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s1
1323; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v1, v0
1324; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1325; GFX10-NEXT:    ; return to shader part epilog
1326  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1327  ret i24 %result
1328}
1329
1330define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1331; GFX6-LABEL: v_fshr_i24:
1332; GFX6:       ; %bb.0:
1333; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1334; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1335; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1336; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1337; GFX6-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1338; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1339; GFX6-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1340; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v3
1341; GFX6-NEXT:    v_mul_lo_u32 v4, v4, v3
1342; GFX6-NEXT:    v_mul_hi_u32 v4, v3, v4
1343; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
1344; GFX6-NEXT:    v_mul_hi_u32 v3, v2, v3
1345; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffff
1346; GFX6-NEXT:    v_and_b32_e32 v1, v1, v4
1347; GFX6-NEXT:    v_mul_lo_u32 v3, v3, 24
1348; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
1349; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
1350; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1351; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1352; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v2
1353; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1354; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1355; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v2
1356; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
1357; GFX6-NEXT:    v_and_b32_e32 v3, v3, v4
1358; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1359; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1360; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
1361; GFX6-NEXT:    s_setpc_b64 s[30:31]
1362;
1363; GFX8-LABEL: v_fshr_i24:
1364; GFX8:       ; %bb.0:
1365; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1366; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1367; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1368; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1369; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1370; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1371; GFX8-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1372; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
1373; GFX8-NEXT:    v_mul_lo_u32 v4, v4, v3
1374; GFX8-NEXT:    v_mul_hi_u32 v4, v3, v4
1375; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
1376; GFX8-NEXT:    v_mul_hi_u32 v3, v2, v3
1377; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffff
1378; GFX8-NEXT:    v_and_b32_e32 v1, v1, v4
1379; GFX8-NEXT:    v_mul_lo_u32 v3, v3, 24
1380; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v2, v3
1381; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
1382; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1383; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1384; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v2
1385; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1386; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1387; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v2
1388; GFX8-NEXT:    v_and_b32_e32 v2, v2, v4
1389; GFX8-NEXT:    v_and_b32_e32 v3, v3, v4
1390; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v3, v0
1391; GFX8-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1392; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
1393; GFX8-NEXT:    s_setpc_b64 s[30:31]
1394;
1395; GFX9-LABEL: v_fshr_i24:
1396; GFX9:       ; %bb.0:
1397; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1399; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1400; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffffffe8
1401; GFX9-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1402; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1403; GFX9-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1404; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
1405; GFX9-NEXT:    v_mul_lo_u32 v4, v4, v3
1406; GFX9-NEXT:    v_mul_hi_u32 v4, v3, v4
1407; GFX9-NEXT:    v_add_u32_e32 v3, v3, v4
1408; GFX9-NEXT:    v_mul_hi_u32 v3, v2, v3
1409; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffffff
1410; GFX9-NEXT:    v_and_b32_e32 v1, v1, v4
1411; GFX9-NEXT:    v_mul_lo_u32 v3, v3, 24
1412; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v3
1413; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
1414; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1415; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1416; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v2
1417; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
1418; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1419; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v2
1420; GFX9-NEXT:    v_and_b32_e32 v2, v2, v4
1421; GFX9-NEXT:    v_and_b32_e32 v3, v3, v4
1422; GFX9-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1423; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1424; GFX9-NEXT:    s_setpc_b64 s[30:31]
1425;
1426; GFX10-LABEL: v_fshr_i24:
1427; GFX10:       ; %bb.0:
1428; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1430; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 24
1431; GFX10-NEXT:    v_and_b32_e32 v2, 0xffffff, v2
1432; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1433; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
1434; GFX10-NEXT:    v_mul_f32_e32 v3, 0x4f7ffffe, v3
1435; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
1436; GFX10-NEXT:    v_mul_lo_u32 v4, 0xffffffe8, v3
1437; GFX10-NEXT:    v_mul_hi_u32 v4, v3, v4
1438; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v4
1439; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffffff
1440; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
1441; GFX10-NEXT:    v_and_b32_e32 v1, v1, v4
1442; GFX10-NEXT:    v_mul_lo_u32 v3, v3, 24
1443; GFX10-NEXT:    v_sub_nc_u32_e32 v2, v2, v3
1444; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1445; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1446; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1447; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v2
1448; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v2
1449; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1450; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v2
1451; GFX10-NEXT:    v_and_b32_e32 v2, v2, v4
1452; GFX10-NEXT:    v_and_b32_e32 v3, v3, v4
1453; GFX10-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
1454; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v3, v1
1455; GFX10-NEXT:    s_setpc_b64 s[30:31]
1456  %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1457  ret i24 %result
1458}
1459
1460define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1461; GFX6-LABEL: s_fshr_v2i24:
1462; GFX6:       ; %bb.0:
1463; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1464; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1465; GFX6-NEXT:    s_movk_i32 s9, 0xff
1466; GFX6-NEXT:    s_mov_b32 s11, 0x80008
1467; GFX6-NEXT:    s_lshr_b32 s6, s0, 16
1468; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1469; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
1470; GFX6-NEXT:    s_lshr_b32 s7, s0, 24
1471; GFX6-NEXT:    s_lshr_b32 s8, s1, 8
1472; GFX6-NEXT:    s_and_b32 s10, s0, s9
1473; GFX6-NEXT:    s_bfe_u32 s0, s0, s11
1474; GFX6-NEXT:    s_and_b32 s1, s1, s9
1475; GFX6-NEXT:    s_lshl_b32 s0, s0, 8
1476; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
1477; GFX6-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1478; GFX6-NEXT:    s_or_b32 s0, s10, s0
1479; GFX6-NEXT:    s_or_b32 s1, s7, s1
1480; GFX6-NEXT:    s_and_b32 s7, s8, s9
1481; GFX6-NEXT:    s_lshr_b32 s8, s2, 16
1482; GFX6-NEXT:    s_lshr_b32 s10, s2, 24
1483; GFX6-NEXT:    s_and_b32 s13, s2, s9
1484; GFX6-NEXT:    s_bfe_u32 s2, s2, s11
1485; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v0
1486; GFX6-NEXT:    s_lshl_b32 s2, s2, 8
1487; GFX6-NEXT:    s_and_b32 s8, s8, s9
1488; GFX6-NEXT:    s_or_b32 s2, s13, s2
1489; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1490; GFX6-NEXT:    s_lshr_b32 s12, s3, 8
1491; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
1492; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1493; GFX6-NEXT:    s_and_b32 s3, s3, s9
1494; GFX6-NEXT:    s_or_b32 s2, s2, s8
1495; GFX6-NEXT:    s_lshl_b32 s3, s3, 8
1496; GFX6-NEXT:    s_and_b32 s8, s12, s9
1497; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
1498; GFX6-NEXT:    s_or_b32 s3, s10, s3
1499; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1500; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
1501; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1502; GFX6-NEXT:    s_or_b32 s3, s3, s8
1503; GFX6-NEXT:    s_lshr_b32 s8, s4, 16
1504; GFX6-NEXT:    s_lshr_b32 s10, s4, 24
1505; GFX6-NEXT:    s_and_b32 s13, s4, s9
1506; GFX6-NEXT:    s_bfe_u32 s4, s4, s11
1507; GFX6-NEXT:    s_lshl_b32 s4, s4, 8
1508; GFX6-NEXT:    s_and_b32 s8, s8, s9
1509; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
1510; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1511; GFX6-NEXT:    s_or_b32 s4, s13, s4
1512; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1513; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1514; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
1515; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1516; GFX6-NEXT:    s_or_b32 s4, s4, s8
1517; GFX6-NEXT:    v_mul_hi_u32 v0, s4, v0
1518; GFX6-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1519; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v2
1520; GFX6-NEXT:    s_lshr_b32 s12, s5, 8
1521; GFX6-NEXT:    v_mul_lo_u32 v0, v0, 24
1522; GFX6-NEXT:    s_and_b32 s5, s5, s9
1523; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v2
1524; GFX6-NEXT:    s_lshl_b32 s5, s5, 8
1525; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
1526; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v0
1527; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1528; GFX6-NEXT:    v_mul_hi_u32 v1, v2, v1
1529; GFX6-NEXT:    s_and_b32 s8, s12, s9
1530; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1531; GFX6-NEXT:    s_or_b32 s5, s10, s5
1532; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
1533; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, 24, v0
1534; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
1535; GFX6-NEXT:    s_lshl_b32 s8, s8, 16
1536; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1537; GFX6-NEXT:    s_or_b32 s5, s5, s8
1538; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1539; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
1540; GFX6-NEXT:    v_mul_hi_u32 v1, s5, v1
1541; GFX6-NEXT:    s_and_b32 s6, s6, s9
1542; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
1543; GFX6-NEXT:    s_bfe_u32 s6, s6, 0x100000
1544; GFX6-NEXT:    v_mul_lo_u32 v1, v1, 24
1545; GFX6-NEXT:    s_mov_b32 s8, 0xffffff
1546; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, 23, v0
1547; GFX6-NEXT:    s_lshl_b32 s4, s6, 17
1548; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
1549; GFX6-NEXT:    s_or_b32 s0, s4, s0
1550; GFX6-NEXT:    v_and_b32_e32 v2, s8, v3
1551; GFX6-NEXT:    v_and_b32_e32 v0, s8, v0
1552; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
1553; GFX6-NEXT:    v_lshr_b32_e32 v0, s2, v0
1554; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s5, v1
1555; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1556; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 24, v1
1557; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1558; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1559; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, 24, v1
1560; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1561; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
1562; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
1563; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1564; GFX6-NEXT:    v_mov_b32_e32 v4, 0xffffff
1565; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 23, v1
1566; GFX6-NEXT:    s_lshl_b32 s0, s7, 17
1567; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
1568; GFX6-NEXT:    s_or_b32 s0, s0, s1
1569; GFX6-NEXT:    v_and_b32_e32 v2, v2, v4
1570; GFX6-NEXT:    v_and_b32_e32 v1, v1, v4
1571; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
1572; GFX6-NEXT:    v_lshr_b32_e32 v1, s3, v1
1573; GFX6-NEXT:    v_bfe_u32 v3, v0, 8, 8
1574; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
1575; GFX6-NEXT:    v_and_b32_e32 v2, s9, v0
1576; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
1577; GFX6-NEXT:    v_bfe_u32 v0, v0, 16, 8
1578; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
1579; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1580; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
1581; GFX6-NEXT:    v_and_b32_e32 v2, s9, v1
1582; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
1583; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
1584; GFX6-NEXT:    v_bfe_u32 v2, v1, 8, 8
1585; GFX6-NEXT:    v_bfe_u32 v1, v1, 16, 8
1586; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1587; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
1588; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
1589; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
1590; GFX6-NEXT:    ; return to shader part epilog
1591;
1592; GFX8-LABEL: s_fshr_v2i24:
1593; GFX8:       ; %bb.0:
1594; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1595; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1596; GFX8-NEXT:    s_movk_i32 s10, 0xff
1597; GFX8-NEXT:    s_lshr_b32 s9, s1, 8
1598; GFX8-NEXT:    s_bfe_u32 s11, 8, 0x100000
1599; GFX8-NEXT:    s_and_b32 s1, s1, s10
1600; GFX8-NEXT:    s_lshr_b32 s6, s0, 8
1601; GFX8-NEXT:    s_lshr_b32 s8, s0, 24
1602; GFX8-NEXT:    s_lshl_b32 s1, s1, s11
1603; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1604; GFX8-NEXT:    s_and_b32 s6, s6, s10
1605; GFX8-NEXT:    s_or_b32 s1, s8, s1
1606; GFX8-NEXT:    s_lshr_b32 s8, s2, 8
1607; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
1608; GFX8-NEXT:    s_lshr_b32 s7, s0, 16
1609; GFX8-NEXT:    s_and_b32 s0, s0, s10
1610; GFX8-NEXT:    s_lshl_b32 s6, s6, s11
1611; GFX8-NEXT:    s_and_b32 s8, s8, s10
1612; GFX8-NEXT:    s_or_b32 s0, s0, s6
1613; GFX8-NEXT:    s_and_b32 s6, s7, s10
1614; GFX8-NEXT:    s_and_b32 s7, s9, s10
1615; GFX8-NEXT:    s_lshr_b32 s9, s2, 16
1616; GFX8-NEXT:    s_lshr_b32 s12, s2, 24
1617; GFX8-NEXT:    s_and_b32 s2, s2, s10
1618; GFX8-NEXT:    s_lshl_b32 s8, s8, s11
1619; GFX8-NEXT:    s_or_b32 s2, s2, s8
1620; GFX8-NEXT:    s_and_b32 s8, s9, s10
1621; GFX8-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1622; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1623; GFX8-NEXT:    v_mul_lo_u32 v2, v1, v0
1624; GFX8-NEXT:    s_lshr_b32 s13, s3, 8
1625; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
1626; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1627; GFX8-NEXT:    s_and_b32 s3, s3, s10
1628; GFX8-NEXT:    s_or_b32 s2, s2, s8
1629; GFX8-NEXT:    s_lshl_b32 s3, s3, s11
1630; GFX8-NEXT:    s_and_b32 s8, s13, s10
1631; GFX8-NEXT:    s_or_b32 s3, s12, s3
1632; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1633; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
1634; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1635; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
1636; GFX8-NEXT:    s_or_b32 s3, s3, s8
1637; GFX8-NEXT:    s_lshr_b32 s8, s4, 8
1638; GFX8-NEXT:    s_and_b32 s8, s8, s10
1639; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
1640; GFX8-NEXT:    s_lshr_b32 s12, s4, 24
1641; GFX8-NEXT:    s_and_b32 s4, s4, s10
1642; GFX8-NEXT:    s_lshl_b32 s8, s8, s11
1643; GFX8-NEXT:    s_or_b32 s4, s4, s8
1644; GFX8-NEXT:    s_and_b32 s8, s9, s10
1645; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
1646; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1647; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1648; GFX8-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1649; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
1650; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1651; GFX8-NEXT:    s_or_b32 s4, s4, s8
1652; GFX8-NEXT:    v_mul_hi_u32 v0, s4, v0
1653; GFX8-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1654; GFX8-NEXT:    v_cvt_u32_f32_e32 v2, v2
1655; GFX8-NEXT:    s_lshr_b32 s13, s5, 8
1656; GFX8-NEXT:    v_mul_lo_u32 v0, v0, 24
1657; GFX8-NEXT:    s_and_b32 s5, s5, s10
1658; GFX8-NEXT:    v_mul_lo_u32 v1, v1, v2
1659; GFX8-NEXT:    s_lshl_b32 s5, s5, s11
1660; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, s4, v0
1661; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v0
1662; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1663; GFX8-NEXT:    v_mul_hi_u32 v1, v2, v1
1664; GFX8-NEXT:    s_and_b32 s8, s13, s10
1665; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1666; GFX8-NEXT:    s_or_b32 s5, s12, s5
1667; GFX8-NEXT:    s_bfe_u32 s8, s8, 0x100000
1668; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, 24, v0
1669; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
1670; GFX8-NEXT:    s_lshl_b32 s8, s8, 16
1671; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1672; GFX8-NEXT:    s_or_b32 s5, s5, s8
1673; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1674; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
1675; GFX8-NEXT:    v_mul_hi_u32 v1, s5, v1
1676; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
1677; GFX8-NEXT:    s_bfe_u32 s6, s6, 0x100000
1678; GFX8-NEXT:    s_mov_b32 s8, 0xffffff
1679; GFX8-NEXT:    v_mul_lo_u32 v1, v1, 24
1680; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, 23, v0
1681; GFX8-NEXT:    s_lshl_b32 s4, s6, 17
1682; GFX8-NEXT:    s_lshl_b32 s0, s0, 1
1683; GFX8-NEXT:    s_or_b32 s0, s4, s0
1684; GFX8-NEXT:    v_and_b32_e32 v2, s8, v3
1685; GFX8-NEXT:    v_and_b32_e32 v0, s8, v0
1686; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
1687; GFX8-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
1688; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, s5, v1
1689; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
1690; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
1691; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1692; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1693; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, 24, v1
1694; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1695; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
1696; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
1697; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1698; GFX8-NEXT:    v_mov_b32_e32 v4, 0xffffff
1699; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 23, v1
1700; GFX8-NEXT:    s_lshl_b32 s0, s7, 17
1701; GFX8-NEXT:    s_lshl_b32 s1, s1, 1
1702; GFX8-NEXT:    s_or_b32 s0, s0, s1
1703; GFX8-NEXT:    v_and_b32_e32 v2, v2, v4
1704; GFX8-NEXT:    v_and_b32_e32 v1, v1, v4
1705; GFX8-NEXT:    v_lshlrev_b32_e64 v2, v2, s0
1706; GFX8-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
1707; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
1708; GFX8-NEXT:    v_mov_b32_e32 v2, 8
1709; GFX8-NEXT:    v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1710; GFX8-NEXT:    v_mov_b32_e32 v4, 16
1711; GFX8-NEXT:    v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1712; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1713; GFX8-NEXT:    v_or_b32_e32 v0, v3, v0
1714; GFX8-NEXT:    v_and_b32_e32 v3, s10, v1
1715; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1716; GFX8-NEXT:    v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1717; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
1718; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
1719; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
1720; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
1721; GFX8-NEXT:    ; return to shader part epilog
1722;
1723; GFX9-LABEL: s_fshr_v2i24:
1724; GFX9:       ; %bb.0:
1725; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1726; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1727; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffffffe8
1728; GFX9-NEXT:    s_movk_i32 s12, 0xff
1729; GFX9-NEXT:    s_lshr_b32 s11, s1, 8
1730; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1731; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
1732; GFX9-NEXT:    s_bfe_u32 s13, 8, 0x100000
1733; GFX9-NEXT:    s_and_b32 s1, s1, s12
1734; GFX9-NEXT:    s_lshr_b32 s7, s0, 8
1735; GFX9-NEXT:    v_mul_lo_u32 v2, v1, v0
1736; GFX9-NEXT:    s_lshr_b32 s10, s0, 24
1737; GFX9-NEXT:    s_lshl_b32 s1, s1, s13
1738; GFX9-NEXT:    s_and_b32 s7, s7, s12
1739; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
1740; GFX9-NEXT:    s_or_b32 s1, s10, s1
1741; GFX9-NEXT:    s_lshr_b32 s10, s2, 8
1742; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
1743; GFX9-NEXT:    s_and_b32 s0, s0, s12
1744; GFX9-NEXT:    s_lshl_b32 s7, s7, s13
1745; GFX9-NEXT:    s_and_b32 s10, s10, s12
1746; GFX9-NEXT:    s_or_b32 s0, s0, s7
1747; GFX9-NEXT:    s_and_b32 s7, s9, s12
1748; GFX9-NEXT:    s_and_b32 s9, s11, s12
1749; GFX9-NEXT:    s_lshr_b32 s11, s2, 16
1750; GFX9-NEXT:    s_lshr_b32 s14, s2, 24
1751; GFX9-NEXT:    s_and_b32 s2, s2, s12
1752; GFX9-NEXT:    s_lshl_b32 s10, s10, s13
1753; GFX9-NEXT:    s_or_b32 s2, s2, s10
1754; GFX9-NEXT:    s_and_b32 s10, s11, s12
1755; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
1756; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v2, 24
1757; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1758; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v2
1759; GFX9-NEXT:    s_lshr_b32 s15, s3, 8
1760; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
1761; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1762; GFX9-NEXT:    s_and_b32 s3, s3, s12
1763; GFX9-NEXT:    s_or_b32 s2, s2, s10
1764; GFX9-NEXT:    s_lshl_b32 s3, s3, s13
1765; GFX9-NEXT:    s_and_b32 s10, s15, s12
1766; GFX9-NEXT:    s_or_b32 s3, s14, s3
1767; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1768; GFX9-NEXT:    s_bfe_u32 s3, s3, 0x100000
1769; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1770; GFX9-NEXT:    v_mul_f32_e32 v2, 0x4f7ffffe, v2
1771; GFX9-NEXT:    s_or_b32 s3, s3, s10
1772; GFX9-NEXT:    s_lshr_b32 s10, s4, 8
1773; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
1774; GFX9-NEXT:    s_and_b32 s10, s10, s12
1775; GFX9-NEXT:    s_lshr_b32 s11, s4, 16
1776; GFX9-NEXT:    s_lshr_b32 s14, s4, 24
1777; GFX9-NEXT:    s_and_b32 s4, s4, s12
1778; GFX9-NEXT:    s_lshl_b32 s10, s10, s13
1779; GFX9-NEXT:    s_or_b32 s4, s4, s10
1780; GFX9-NEXT:    s_and_b32 s10, s11, s12
1781; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1782; GFX9-NEXT:    v_mul_lo_u32 v1, v1, v2
1783; GFX9-NEXT:    s_bfe_u32 s4, s4, 0x100000
1784; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1785; GFX9-NEXT:    s_or_b32 s4, s4, s10
1786; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
1787; GFX9-NEXT:    s_lshr_b32 s15, s5, 8
1788; GFX9-NEXT:    s_and_b32 s5, s5, s12
1789; GFX9-NEXT:    v_mul_hi_u32 v1, v2, v1
1790; GFX9-NEXT:    s_lshl_b32 s5, s5, s13
1791; GFX9-NEXT:    s_and_b32 s10, s15, s12
1792; GFX9-NEXT:    s_or_b32 s5, s14, s5
1793; GFX9-NEXT:    s_bfe_u32 s10, s10, 0x100000
1794; GFX9-NEXT:    s_bfe_u32 s5, s5, 0x100000
1795; GFX9-NEXT:    s_lshl_b32 s10, s10, 16
1796; GFX9-NEXT:    v_mul_lo_u32 v0, v0, 24
1797; GFX9-NEXT:    s_or_b32 s5, s5, s10
1798; GFX9-NEXT:    v_add_u32_e32 v1, v2, v1
1799; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
1800; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
1801; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v0
1802; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1803; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1804; GFX9-NEXT:    v_mul_lo_u32 v1, v1, 24
1805; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v0
1806; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v0
1807; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
1808; GFX9-NEXT:    s_bfe_u32 s7, s7, 0x100000
1809; GFX9-NEXT:    s_mov_b32 s10, 0xffffff
1810; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1811; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v0
1812; GFX9-NEXT:    s_lshl_b32 s4, s7, 17
1813; GFX9-NEXT:    s_lshl_b32 s0, s0, 1
1814; GFX9-NEXT:    v_and_b32_e32 v0, s10, v0
1815; GFX9-NEXT:    s_or_b32 s0, s4, s0
1816; GFX9-NEXT:    v_and_b32_e32 v3, s10, v3
1817; GFX9-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
1818; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
1819; GFX9-NEXT:    v_lshl_or_b32 v0, s0, v3, v0
1820; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v1
1821; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1822; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1823; GFX9-NEXT:    v_subrev_u32_e32 v3, 24, v1
1824; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v1
1825; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
1826; GFX9-NEXT:    s_bfe_u32 s9, s9, 0x100000
1827; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffffff
1828; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
1829; GFX9-NEXT:    v_sub_u32_e32 v3, 23, v1
1830; GFX9-NEXT:    s_lshl_b32 s0, s9, 17
1831; GFX9-NEXT:    s_lshl_b32 s1, s1, 1
1832; GFX9-NEXT:    v_and_b32_e32 v1, v1, v2
1833; GFX9-NEXT:    s_or_b32 s0, s0, s1
1834; GFX9-NEXT:    v_and_b32_e32 v3, v3, v2
1835; GFX9-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
1836; GFX9-NEXT:    s_mov_b32 s6, 8
1837; GFX9-NEXT:    v_lshl_or_b32 v1, s0, v3, v1
1838; GFX9-NEXT:    s_mov_b32 s8, 16
1839; GFX9-NEXT:    v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1840; GFX9-NEXT:    v_and_b32_e32 v3, s12, v1
1841; GFX9-NEXT:    v_and_or_b32 v2, v0, s12, v2
1842; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1843; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1844; GFX9-NEXT:    v_or3_b32 v0, v2, v0, v3
1845; GFX9-NEXT:    v_bfe_u32 v2, v1, 8, 8
1846; GFX9-NEXT:    v_bfe_u32 v1, v1, 16, 8
1847; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 8, v2
1848; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
1849; GFX9-NEXT:    v_readfirstlane_b32 s1, v1
1850; GFX9-NEXT:    ; return to shader part epilog
1851;
1852; GFX10-LABEL: s_fshr_v2i24:
1853; GFX10:       ; %bb.0:
1854; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, 24
1855; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v1, 24
1856; GFX10-NEXT:    s_movk_i32 s9, 0xff
1857; GFX10-NEXT:    s_lshr_b32 s12, s4, 8
1858; GFX10-NEXT:    s_bfe_u32 s10, 8, 0x100000
1859; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
1860; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
1861; GFX10-NEXT:    s_lshr_b32 s13, s4, 16
1862; GFX10-NEXT:    s_and_b32 s12, s12, s9
1863; GFX10-NEXT:    s_lshr_b32 s14, s4, 24
1864; GFX10-NEXT:    s_and_b32 s4, s4, s9
1865; GFX10-NEXT:    s_lshl_b32 s12, s12, s10
1866; GFX10-NEXT:    s_and_b32 s13, s13, s9
1867; GFX10-NEXT:    s_or_b32 s4, s4, s12
1868; GFX10-NEXT:    s_bfe_u32 s12, s13, 0x100000
1869; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
1870; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
1871; GFX10-NEXT:    s_lshr_b32 s15, s5, 8
1872; GFX10-NEXT:    s_bfe_u32 s4, s4, 0x100000
1873; GFX10-NEXT:    s_lshl_b32 s12, s12, 16
1874; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
1875; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
1876; GFX10-NEXT:    s_and_b32 s5, s5, s9
1877; GFX10-NEXT:    s_or_b32 s4, s4, s12
1878; GFX10-NEXT:    s_lshl_b32 s5, s5, s10
1879; GFX10-NEXT:    v_mul_lo_u32 v2, 0xffffffe8, v0
1880; GFX10-NEXT:    v_mul_lo_u32 v3, 0xffffffe8, v1
1881; GFX10-NEXT:    s_and_b32 s12, s15, s9
1882; GFX10-NEXT:    s_or_b32 s5, s14, s5
1883; GFX10-NEXT:    s_bfe_u32 s12, s12, 0x100000
1884; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
1885; GFX10-NEXT:    s_lshl_b32 s12, s12, 16
1886; GFX10-NEXT:    s_lshr_b32 s11, s1, 8
1887; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
1888; GFX10-NEXT:    s_or_b32 s5, s5, s12
1889; GFX10-NEXT:    s_and_b32 s1, s1, s9
1890; GFX10-NEXT:    s_lshr_b32 s6, s0, 8
1891; GFX10-NEXT:    s_lshr_b32 s8, s0, 24
1892; GFX10-NEXT:    s_lshl_b32 s1, s1, s10
1893; GFX10-NEXT:    s_and_b32 s6, s6, s9
1894; GFX10-NEXT:    s_or_b32 s1, s8, s1
1895; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v2
1896; GFX10-NEXT:    v_mul_hi_u32 v2, v1, v3
1897; GFX10-NEXT:    s_lshr_b32 s8, s2, 8
1898; GFX10-NEXT:    s_lshr_b32 s7, s0, 16
1899; GFX10-NEXT:    s_and_b32 s0, s0, s9
1900; GFX10-NEXT:    v_mul_hi_u32 v0, s4, v0
1901; GFX10-NEXT:    s_lshl_b32 s6, s6, s10
1902; GFX10-NEXT:    s_and_b32 s8, s8, s9
1903; GFX10-NEXT:    s_or_b32 s0, s0, s6
1904; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v2
1905; GFX10-NEXT:    s_and_b32 s6, s7, s9
1906; GFX10-NEXT:    s_and_b32 s7, s11, s9
1907; GFX10-NEXT:    s_lshr_b32 s11, s2, 16
1908; GFX10-NEXT:    v_mul_lo_u32 v0, v0, 24
1909; GFX10-NEXT:    v_mul_hi_u32 v1, s5, v1
1910; GFX10-NEXT:    s_lshr_b32 s13, s2, 24
1911; GFX10-NEXT:    s_and_b32 s2, s2, s9
1912; GFX10-NEXT:    s_lshl_b32 s8, s8, s10
1913; GFX10-NEXT:    s_lshr_b32 s12, s3, 8
1914; GFX10-NEXT:    s_or_b32 s2, s2, s8
1915; GFX10-NEXT:    s_and_b32 s8, s11, s9
1916; GFX10-NEXT:    v_sub_nc_u32_e32 v0, s4, v0
1917; GFX10-NEXT:    v_mul_lo_u32 v1, v1, 24
1918; GFX10-NEXT:    s_bfe_u32 s4, s8, 0x100000
1919; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
1920; GFX10-NEXT:    s_lshl_b32 s4, s4, 16
1921; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v0
1922; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1923; GFX10-NEXT:    s_and_b32 s3, s3, s9
1924; GFX10-NEXT:    s_or_b32 s2, s2, s4
1925; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s5, v1
1926; GFX10-NEXT:    s_mov_b32 s4, 0xffffff
1927; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1928; GFX10-NEXT:    s_lshl_b32 s3, s3, s10
1929; GFX10-NEXT:    s_and_b32 s5, s12, s9
1930; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
1931; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
1932; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 24, v0
1933; GFX10-NEXT:    s_or_b32 s3, s13, s3
1934; GFX10-NEXT:    s_bfe_u32 s5, s5, 0x100000
1935; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
1936; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
1937; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v0
1938; GFX10-NEXT:    s_lshl_b32 s5, s5, 16
1939; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
1940; GFX10-NEXT:    s_bfe_u32 s6, s6, 0x100000
1941; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, 24, v1
1942; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
1943; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v1
1944; GFX10-NEXT:    s_or_b32 s3, s3, s5
1945; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
1946; GFX10-NEXT:    s_bfe_u32 s7, s7, 0x100000
1947; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 23, v0
1948; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc_lo
1949; GFX10-NEXT:    v_mov_b32_e32 v2, 0xffffff
1950; GFX10-NEXT:    v_and_b32_e32 v0, s4, v0
1951; GFX10-NEXT:    s_lshl_b32 s5, s6, 17
1952; GFX10-NEXT:    v_and_b32_e32 v3, s4, v3
1953; GFX10-NEXT:    v_sub_nc_u32_e32 v4, 23, v1
1954; GFX10-NEXT:    v_and_b32_e32 v1, v1, v2
1955; GFX10-NEXT:    v_lshrrev_b32_e64 v0, v0, s2
1956; GFX10-NEXT:    s_lshl_b32 s0, s0, 1
1957; GFX10-NEXT:    s_lshl_b32 s2, s7, 17
1958; GFX10-NEXT:    v_and_b32_e32 v2, v4, v2
1959; GFX10-NEXT:    v_lshrrev_b32_e64 v1, v1, s3
1960; GFX10-NEXT:    s_or_b32 s0, s5, s0
1961; GFX10-NEXT:    s_lshl_b32 s1, s1, 1
1962; GFX10-NEXT:    v_lshl_or_b32 v0, s0, v3, v0
1963; GFX10-NEXT:    s_or_b32 s0, s2, s1
1964; GFX10-NEXT:    v_lshl_or_b32 v1, s0, v2, v1
1965; GFX10-NEXT:    s_mov_b32 s0, 8
1966; GFX10-NEXT:    v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1967; GFX10-NEXT:    s_mov_b32 s0, 16
1968; GFX10-NEXT:    v_and_b32_e32 v3, s9, v1
1969; GFX10-NEXT:    v_bfe_u32 v4, v1, 8, 8
1970; GFX10-NEXT:    v_bfe_u32 v1, v1, 16, 8
1971; GFX10-NEXT:    v_and_or_b32 v2, v0, s9, v2
1972; GFX10-NEXT:    v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1973; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1974; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 8, v4
1975; GFX10-NEXT:    v_or3_b32 v0, v2, v0, v3
1976; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
1977; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
1978; GFX10-NEXT:    ; return to shader part epilog
1979  %lhs = bitcast i48 %lhs.arg to <2 x i24>
1980  %rhs = bitcast i48 %rhs.arg to <2 x i24>
1981  %amt = bitcast i48 %amt.arg to <2 x i24>
1982  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
1983  %cast.result = bitcast <2 x i24> %result to i48
1984  ret i48 %cast.result
1985}
1986
1987define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
1988; GFX6-LABEL: v_fshr_v2i24:
1989; GFX6:       ; %bb.0:
1990; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1991; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
1992; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v6
1993; GFX6-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
1994; GFX6-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
1995; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
1996; GFX6-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
1997; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v6
1998; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
1999; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2000; GFX6-NEXT:    v_mul_lo_u32 v8, v7, v6
2001; GFX6-NEXT:    v_mul_hi_u32 v8, v6, v8
2002; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
2003; GFX6-NEXT:    v_mul_hi_u32 v6, v4, v6
2004; GFX6-NEXT:    v_rcp_iflag_f32_e32 v8, v9
2005; GFX6-NEXT:    v_mov_b32_e32 v9, 0xffffff
2006; GFX6-NEXT:    v_and_b32_e32 v5, v5, v9
2007; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2008; GFX6-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
2009; GFX6-NEXT:    v_cvt_u32_f32_e32 v8, v8
2010; GFX6-NEXT:    v_and_b32_e32 v2, v2, v9
2011; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
2012; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 24, v4
2013; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2014; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2015; GFX6-NEXT:    v_subrev_i32_e32 v6, vcc, 24, v4
2016; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2017; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2018; GFX6-NEXT:    v_mul_lo_u32 v6, v7, v8
2019; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, 23, v4
2020; GFX6-NEXT:    v_and_b32_e32 v7, v7, v9
2021; GFX6-NEXT:    v_mul_hi_u32 v6, v8, v6
2022; GFX6-NEXT:    v_and_b32_e32 v4, v4, v9
2023; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
2024; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2025; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v8, v6
2026; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
2027; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
2028; GFX6-NEXT:    v_and_b32_e32 v3, v3, v9
2029; GFX6-NEXT:    v_mul_lo_u32 v6, v6, 24
2030; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v5, v6
2031; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
2032; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2033; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2034; GFX6-NEXT:    v_subrev_i32_e32 v4, vcc, 24, v2
2035; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2036; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2037; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 23, v2
2038; GFX6-NEXT:    v_and_b32_e32 v4, v4, v9
2039; GFX6-NEXT:    v_and_b32_e32 v2, v2, v9
2040; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2041; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2042; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
2043; GFX6-NEXT:    s_setpc_b64 s[30:31]
2044;
2045; GFX8-LABEL: v_fshr_v2i24:
2046; GFX8:       ; %bb.0:
2047; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2048; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2049; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2050; GFX8-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2051; GFX8-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2052; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2053; GFX8-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2054; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
2055; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2056; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2057; GFX8-NEXT:    v_mul_lo_u32 v8, v7, v6
2058; GFX8-NEXT:    v_mul_hi_u32 v8, v6, v8
2059; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
2060; GFX8-NEXT:    v_mul_hi_u32 v6, v4, v6
2061; GFX8-NEXT:    v_rcp_iflag_f32_e32 v8, v9
2062; GFX8-NEXT:    v_mov_b32_e32 v9, 0xffffff
2063; GFX8-NEXT:    v_and_b32_e32 v5, v5, v9
2064; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2065; GFX8-NEXT:    v_mul_f32_e32 v8, 0x4f7ffffe, v8
2066; GFX8-NEXT:    v_cvt_u32_f32_e32 v8, v8
2067; GFX8-NEXT:    v_and_b32_e32 v2, v2, v9
2068; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, v4, v6
2069; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 24, v4
2070; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2071; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2072; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, 24, v4
2073; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2074; GFX8-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2075; GFX8-NEXT:    v_mul_lo_u32 v6, v7, v8
2076; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, 23, v4
2077; GFX8-NEXT:    v_and_b32_e32 v7, v7, v9
2078; GFX8-NEXT:    v_mul_hi_u32 v6, v8, v6
2079; GFX8-NEXT:    v_and_b32_e32 v4, v4, v9
2080; GFX8-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
2081; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2082; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v8, v6
2083; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
2084; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
2085; GFX8-NEXT:    v_and_b32_e32 v3, v3, v9
2086; GFX8-NEXT:    v_mul_lo_u32 v6, v6, 24
2087; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, v5, v6
2088; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
2089; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2090; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2091; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, 24, v2
2092; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2093; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2094; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 23, v2
2095; GFX8-NEXT:    v_and_b32_e32 v4, v4, v9
2096; GFX8-NEXT:    v_and_b32_e32 v2, v2, v9
2097; GFX8-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
2098; GFX8-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2099; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
2100; GFX8-NEXT:    s_setpc_b64 s[30:31]
2101;
2102; GFX9-LABEL: v_fshr_v2i24:
2103; GFX9:       ; %bb.0:
2104; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2105; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2106; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2107; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffffffe8
2108; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v9, 24
2109; GFX9-NEXT:    v_rcp_iflag_f32_e32 v9, v9
2110; GFX9-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2111; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v6
2112; GFX9-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2113; GFX9-NEXT:    v_mul_f32_e32 v9, 0x4f7ffffe, v9
2114; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2115; GFX9-NEXT:    v_mul_lo_u32 v8, v7, v6
2116; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2117; GFX9-NEXT:    v_mul_hi_u32 v8, v6, v8
2118; GFX9-NEXT:    v_add_u32_e32 v6, v6, v8
2119; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v9
2120; GFX9-NEXT:    v_mul_hi_u32 v6, v4, v6
2121; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffffff
2122; GFX9-NEXT:    v_and_b32_e32 v5, v5, v9
2123; GFX9-NEXT:    v_mul_lo_u32 v7, v7, v8
2124; GFX9-NEXT:    v_mul_lo_u32 v6, v6, 24
2125; GFX9-NEXT:    v_and_b32_e32 v2, v2, v9
2126; GFX9-NEXT:    v_and_b32_e32 v3, v3, v9
2127; GFX9-NEXT:    v_mul_hi_u32 v7, v8, v7
2128; GFX9-NEXT:    v_sub_u32_e32 v4, v4, v6
2129; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
2130; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2131; GFX9-NEXT:    v_add_u32_e32 v7, v8, v7
2132; GFX9-NEXT:    v_mul_hi_u32 v7, v5, v7
2133; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2134; GFX9-NEXT:    v_subrev_u32_e32 v6, 24, v4
2135; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v4
2136; GFX9-NEXT:    v_mul_lo_u32 v7, v7, 24
2137; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
2138; GFX9-NEXT:    v_sub_u32_e32 v6, 23, v4
2139; GFX9-NEXT:    v_and_b32_e32 v4, v4, v9
2140; GFX9-NEXT:    v_and_b32_e32 v6, v6, v9
2141; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2142; GFX9-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2143; GFX9-NEXT:    v_sub_u32_e32 v2, v5, v7
2144; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
2145; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2146; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2147; GFX9-NEXT:    v_subrev_u32_e32 v4, 24, v2
2148; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 24, v2
2149; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
2150; GFX9-NEXT:    v_sub_u32_e32 v4, 23, v2
2151; GFX9-NEXT:    v_and_b32_e32 v2, v2, v9
2152; GFX9-NEXT:    v_and_b32_e32 v4, v4, v9
2153; GFX9-NEXT:    v_lshrrev_b32_e32 v2, v2, v3
2154; GFX9-NEXT:    v_lshl_or_b32 v1, v1, v4, v2
2155; GFX9-NEXT:    s_setpc_b64 s[30:31]
2156;
2157; GFX10-LABEL: v_fshr_v2i24:
2158; GFX10:       ; %bb.0:
2159; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2161; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v6, 24
2162; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v7, 24
2163; GFX10-NEXT:    v_mov_b32_e32 v10, 0xffffff
2164; GFX10-NEXT:    v_and_b32_e32 v4, 0xffffff, v4
2165; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2166; GFX10-NEXT:    v_rcp_iflag_f32_e32 v6, v6
2167; GFX10-NEXT:    v_rcp_iflag_f32_e32 v7, v7
2168; GFX10-NEXT:    v_and_b32_e32 v5, v5, v10
2169; GFX10-NEXT:    v_and_b32_e32 v2, v2, v10
2170; GFX10-NEXT:    v_and_b32_e32 v3, v3, v10
2171; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
2172; GFX10-NEXT:    v_mul_f32_e32 v6, 0x4f7ffffe, v6
2173; GFX10-NEXT:    v_mul_f32_e32 v7, 0x4f7ffffe, v7
2174; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v6
2175; GFX10-NEXT:    v_cvt_u32_f32_e32 v7, v7
2176; GFX10-NEXT:    v_mul_lo_u32 v8, 0xffffffe8, v6
2177; GFX10-NEXT:    v_mul_lo_u32 v9, 0xffffffe8, v7
2178; GFX10-NEXT:    v_mul_hi_u32 v8, v6, v8
2179; GFX10-NEXT:    v_mul_hi_u32 v9, v7, v9
2180; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v8
2181; GFX10-NEXT:    v_add_nc_u32_e32 v7, v7, v9
2182; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v6
2183; GFX10-NEXT:    v_mul_hi_u32 v7, v5, v7
2184; GFX10-NEXT:    v_mul_lo_u32 v6, v6, 24
2185; GFX10-NEXT:    v_mul_lo_u32 v7, v7, 24
2186; GFX10-NEXT:    v_sub_nc_u32_e32 v4, v4, v6
2187; GFX10-NEXT:    v_sub_nc_u32_e32 v5, v5, v7
2188; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2189; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2190; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2191; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2192; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2193; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, 24, v4
2194; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2195; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v4
2196; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, 24, v5
2197; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2198; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 24, v5
2199; GFX10-NEXT:    v_sub_nc_u32_e32 v6, 23, v4
2200; GFX10-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2201; GFX10-NEXT:    v_and_b32_e32 v4, v4, v10
2202; GFX10-NEXT:    v_and_b32_e32 v6, v6, v10
2203; GFX10-NEXT:    v_sub_nc_u32_e32 v7, 23, v5
2204; GFX10-NEXT:    v_and_b32_e32 v5, v5, v10
2205; GFX10-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
2206; GFX10-NEXT:    v_and_b32_e32 v4, v7, v10
2207; GFX10-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
2208; GFX10-NEXT:    v_lshl_or_b32 v0, v0, v6, v2
2209; GFX10-NEXT:    v_lshl_or_b32 v1, v1, v4, v3
2210; GFX10-NEXT:    s_setpc_b64 s[30:31]
2211  %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2212  ret <2 x i24> %result
2213}
2214
2215define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2216; GFX6-LABEL: s_fshr_i32:
2217; GFX6:       ; %bb.0:
2218; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2219; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2220; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2221; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2222; GFX6-NEXT:    ; return to shader part epilog
2223;
2224; GFX8-LABEL: s_fshr_i32:
2225; GFX8:       ; %bb.0:
2226; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2227; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2228; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2229; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2230; GFX8-NEXT:    ; return to shader part epilog
2231;
2232; GFX9-LABEL: s_fshr_i32:
2233; GFX9:       ; %bb.0:
2234; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2235; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2236; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2237; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2238; GFX9-NEXT:    ; return to shader part epilog
2239;
2240; GFX10-LABEL: s_fshr_i32:
2241; GFX10:       ; %bb.0:
2242; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2243; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2244; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2245; GFX10-NEXT:    ; return to shader part epilog
2246  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2247  ret i32 %result
2248}
2249
2250define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2251; GFX6-LABEL: s_fshr_i32_5:
2252; GFX6:       ; %bb.0:
2253; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2254; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2255; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2256; GFX6-NEXT:    ; return to shader part epilog
2257;
2258; GFX8-LABEL: s_fshr_i32_5:
2259; GFX8:       ; %bb.0:
2260; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2261; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2262; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2263; GFX8-NEXT:    ; return to shader part epilog
2264;
2265; GFX9-LABEL: s_fshr_i32_5:
2266; GFX9:       ; %bb.0:
2267; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2268; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 5
2269; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2270; GFX9-NEXT:    ; return to shader part epilog
2271;
2272; GFX10-LABEL: s_fshr_i32_5:
2273; GFX10:       ; %bb.0:
2274; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 5
2275; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2276; GFX10-NEXT:    ; return to shader part epilog
2277  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2278  ret i32 %result
2279}
2280
2281define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2282; GFX6-LABEL: s_fshr_i32_8:
2283; GFX6:       ; %bb.0:
2284; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2285; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2286; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
2287; GFX6-NEXT:    ; return to shader part epilog
2288;
2289; GFX8-LABEL: s_fshr_i32_8:
2290; GFX8:       ; %bb.0:
2291; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2292; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2293; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
2294; GFX8-NEXT:    ; return to shader part epilog
2295;
2296; GFX9-LABEL: s_fshr_i32_8:
2297; GFX9:       ; %bb.0:
2298; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2299; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, 8
2300; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
2301; GFX9-NEXT:    ; return to shader part epilog
2302;
2303; GFX10-LABEL: s_fshr_i32_8:
2304; GFX10:       ; %bb.0:
2305; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, 8
2306; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
2307; GFX10-NEXT:    ; return to shader part epilog
2308  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2309  ret i32 %result
2310}
2311
2312define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2313; GFX6-LABEL: v_fshr_i32:
2314; GFX6:       ; %bb.0:
2315; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2317; GFX6-NEXT:    s_setpc_b64 s[30:31]
2318;
2319; GFX8-LABEL: v_fshr_i32:
2320; GFX8:       ; %bb.0:
2321; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2322; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2323; GFX8-NEXT:    s_setpc_b64 s[30:31]
2324;
2325; GFX9-LABEL: v_fshr_i32:
2326; GFX9:       ; %bb.0:
2327; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2328; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2329; GFX9-NEXT:    s_setpc_b64 s[30:31]
2330;
2331; GFX10-LABEL: v_fshr_i32:
2332; GFX10:       ; %bb.0:
2333; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2335; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, v2
2336; GFX10-NEXT:    s_setpc_b64 s[30:31]
2337  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2338  ret i32 %result
2339}
2340
2341define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
2342; GFX6-LABEL: v_fshr_i32_5:
2343; GFX6:       ; %bb.0:
2344; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2345; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2346; GFX6-NEXT:    s_setpc_b64 s[30:31]
2347;
2348; GFX8-LABEL: v_fshr_i32_5:
2349; GFX8:       ; %bb.0:
2350; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2351; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2352; GFX8-NEXT:    s_setpc_b64 s[30:31]
2353;
2354; GFX9-LABEL: v_fshr_i32_5:
2355; GFX9:       ; %bb.0:
2356; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2358; GFX9-NEXT:    s_setpc_b64 s[30:31]
2359;
2360; GFX10-LABEL: v_fshr_i32_5:
2361; GFX10:       ; %bb.0:
2362; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2363; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2364; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, 5
2365; GFX10-NEXT:    s_setpc_b64 s[30:31]
2366  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2367  ret i32 %result
2368}
2369
2370define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
2371; GFX6-LABEL: v_fshr_i32_8:
2372; GFX6:       ; %bb.0:
2373; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2374; GFX6-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2375; GFX6-NEXT:    s_setpc_b64 s[30:31]
2376;
2377; GFX8-LABEL: v_fshr_i32_8:
2378; GFX8:       ; %bb.0:
2379; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2380; GFX8-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2381; GFX8-NEXT:    s_setpc_b64 s[30:31]
2382;
2383; GFX9-LABEL: v_fshr_i32_8:
2384; GFX9:       ; %bb.0:
2385; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2386; GFX9-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2387; GFX9-NEXT:    s_setpc_b64 s[30:31]
2388;
2389; GFX10-LABEL: v_fshr_i32_8:
2390; GFX10:       ; %bb.0:
2391; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2393; GFX10-NEXT:    v_alignbit_b32 v0, v0, v1, 8
2394; GFX10-NEXT:    s_setpc_b64 s[30:31]
2395  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2396  ret i32 %result
2397}
2398
2399define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2400; GFX6-LABEL: v_fshr_i32_ssv:
2401; GFX6:       ; %bb.0:
2402; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2403; GFX6-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2404; GFX6-NEXT:    ; return to shader part epilog
2405;
2406; GFX8-LABEL: v_fshr_i32_ssv:
2407; GFX8:       ; %bb.0:
2408; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2409; GFX8-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2410; GFX8-NEXT:    ; return to shader part epilog
2411;
2412; GFX9-LABEL: v_fshr_i32_ssv:
2413; GFX9:       ; %bb.0:
2414; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2415; GFX9-NEXT:    v_alignbit_b32 v0, s0, v1, v0
2416; GFX9-NEXT:    ; return to shader part epilog
2417;
2418; GFX10-LABEL: v_fshr_i32_ssv:
2419; GFX10:       ; %bb.0:
2420; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2421; GFX10-NEXT:    ; return to shader part epilog
2422  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2423  %cast.result = bitcast i32 %result to float
2424  ret float %cast.result
2425}
2426
2427define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2428; GFX6-LABEL: v_fshr_i32_svs:
2429; GFX6:       ; %bb.0:
2430; GFX6-NEXT:    v_mov_b32_e32 v1, s1
2431; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2432; GFX6-NEXT:    ; return to shader part epilog
2433;
2434; GFX8-LABEL: v_fshr_i32_svs:
2435; GFX8:       ; %bb.0:
2436; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2437; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2438; GFX8-NEXT:    ; return to shader part epilog
2439;
2440; GFX9-LABEL: v_fshr_i32_svs:
2441; GFX9:       ; %bb.0:
2442; GFX9-NEXT:    v_mov_b32_e32 v1, s1
2443; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2444; GFX9-NEXT:    ; return to shader part epilog
2445;
2446; GFX10-LABEL: v_fshr_i32_svs:
2447; GFX10:       ; %bb.0:
2448; GFX10-NEXT:    v_alignbit_b32 v0, s0, v0, s1
2449; GFX10-NEXT:    ; return to shader part epilog
2450  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2451  %cast.result = bitcast i32 %result to float
2452  ret float %cast.result
2453}
2454
2455define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2456; GFX6-LABEL: v_fshr_i32_vss:
2457; GFX6:       ; %bb.0:
2458; GFX6-NEXT:    v_mov_b32_e32 v0, s1
2459; GFX6-NEXT:    v_mov_b32_e32 v1, s2
2460; GFX6-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2461; GFX6-NEXT:    ; return to shader part epilog
2462;
2463; GFX8-LABEL: v_fshr_i32_vss:
2464; GFX8:       ; %bb.0:
2465; GFX8-NEXT:    v_mov_b32_e32 v0, s1
2466; GFX8-NEXT:    v_mov_b32_e32 v1, s2
2467; GFX8-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2468; GFX8-NEXT:    ; return to shader part epilog
2469;
2470; GFX9-LABEL: v_fshr_i32_vss:
2471; GFX9:       ; %bb.0:
2472; GFX9-NEXT:    v_mov_b32_e32 v0, s1
2473; GFX9-NEXT:    v_mov_b32_e32 v1, s2
2474; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v1
2475; GFX9-NEXT:    ; return to shader part epilog
2476;
2477; GFX10-LABEL: v_fshr_i32_vss:
2478; GFX10:       ; %bb.0:
2479; GFX10-NEXT:    v_mov_b32_e32 v0, s2
2480; GFX10-NEXT:    v_alignbit_b32 v0, s0, s1, v0
2481; GFX10-NEXT:    ; return to shader part epilog
2482  %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2483  %cast.result = bitcast i32 %result to float
2484  ret float %cast.result
2485}
2486
2487define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
2488; GFX6-LABEL: v_fshr_v2i32:
2489; GFX6:       ; %bb.0:
2490; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2491; GFX6-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2492; GFX6-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2493; GFX6-NEXT:    s_setpc_b64 s[30:31]
2494;
2495; GFX8-LABEL: v_fshr_v2i32:
2496; GFX8:       ; %bb.0:
2497; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2498; GFX8-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2499; GFX8-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2500; GFX8-NEXT:    s_setpc_b64 s[30:31]
2501;
2502; GFX9-LABEL: v_fshr_v2i32:
2503; GFX9:       ; %bb.0:
2504; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2505; GFX9-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2506; GFX9-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2507; GFX9-NEXT:    s_setpc_b64 s[30:31]
2508;
2509; GFX10-LABEL: v_fshr_v2i32:
2510; GFX10:       ; %bb.0:
2511; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2512; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2513; GFX10-NEXT:    v_alignbit_b32 v0, v0, v2, v4
2514; GFX10-NEXT:    v_alignbit_b32 v1, v1, v3, v5
2515; GFX10-NEXT:    s_setpc_b64 s[30:31]
2516  %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
2517  ret <2 x i32> %result
2518}
2519
2520define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
2521; GFX6-LABEL: v_fshr_v3i32:
2522; GFX6:       ; %bb.0:
2523; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2524; GFX6-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2525; GFX6-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2526; GFX6-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2527; GFX6-NEXT:    s_setpc_b64 s[30:31]
2528;
2529; GFX8-LABEL: v_fshr_v3i32:
2530; GFX8:       ; %bb.0:
2531; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2532; GFX8-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2533; GFX8-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2534; GFX8-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2535; GFX8-NEXT:    s_setpc_b64 s[30:31]
2536;
2537; GFX9-LABEL: v_fshr_v3i32:
2538; GFX9:       ; %bb.0:
2539; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2540; GFX9-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2541; GFX9-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2542; GFX9-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2543; GFX9-NEXT:    s_setpc_b64 s[30:31]
2544;
2545; GFX10-LABEL: v_fshr_v3i32:
2546; GFX10:       ; %bb.0:
2547; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2549; GFX10-NEXT:    v_alignbit_b32 v0, v0, v3, v6
2550; GFX10-NEXT:    v_alignbit_b32 v1, v1, v4, v7
2551; GFX10-NEXT:    v_alignbit_b32 v2, v2, v5, v8
2552; GFX10-NEXT:    s_setpc_b64 s[30:31]
2553  %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
2554  ret <3 x i32> %result
2555}
2556
2557define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
2558; GFX6-LABEL: v_fshr_v4i32:
2559; GFX6:       ; %bb.0:
2560; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2561; GFX6-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2562; GFX6-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2563; GFX6-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2564; GFX6-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2565; GFX6-NEXT:    s_setpc_b64 s[30:31]
2566;
2567; GFX8-LABEL: v_fshr_v4i32:
2568; GFX8:       ; %bb.0:
2569; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2570; GFX8-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2571; GFX8-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2572; GFX8-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2573; GFX8-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2574; GFX8-NEXT:    s_setpc_b64 s[30:31]
2575;
2576; GFX9-LABEL: v_fshr_v4i32:
2577; GFX9:       ; %bb.0:
2578; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2579; GFX9-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2580; GFX9-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2581; GFX9-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2582; GFX9-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2583; GFX9-NEXT:    s_setpc_b64 s[30:31]
2584;
2585; GFX10-LABEL: v_fshr_v4i32:
2586; GFX10:       ; %bb.0:
2587; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2589; GFX10-NEXT:    v_alignbit_b32 v0, v0, v4, v8
2590; GFX10-NEXT:    v_alignbit_b32 v1, v1, v5, v9
2591; GFX10-NEXT:    v_alignbit_b32 v2, v2, v6, v10
2592; GFX10-NEXT:    v_alignbit_b32 v3, v3, v7, v11
2593; GFX10-NEXT:    s_setpc_b64 s[30:31]
2594  %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
2595  ret <4 x i32> %result
2596}
2597
2598define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
2599; GFX6-LABEL: s_fshr_i16:
2600; GFX6:       ; %bb.0:
2601; GFX6-NEXT:    s_and_b32 s3, s2, 15
2602; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
2603; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
2604; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
2605; GFX6-NEXT:    s_lshl_b32 s0, s0, s2
2606; GFX6-NEXT:    s_bfe_u32 s2, s3, 0x100000
2607; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
2608; GFX6-NEXT:    s_lshr_b32 s1, s1, s2
2609; GFX6-NEXT:    s_or_b32 s0, s0, s1
2610; GFX6-NEXT:    ; return to shader part epilog
2611;
2612; GFX8-LABEL: s_fshr_i16:
2613; GFX8:       ; %bb.0:
2614; GFX8-NEXT:    s_and_b32 s3, s2, 15
2615; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
2616; GFX8-NEXT:    s_bfe_u32 s4, 1, 0x100000
2617; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
2618; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
2619; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2620; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2621; GFX8-NEXT:    s_bfe_u32 s2, s3, 0x100000
2622; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
2623; GFX8-NEXT:    s_or_b32 s0, s0, s1
2624; GFX8-NEXT:    ; return to shader part epilog
2625;
2626; GFX9-LABEL: s_fshr_i16:
2627; GFX9:       ; %bb.0:
2628; GFX9-NEXT:    s_and_b32 s3, s2, 15
2629; GFX9-NEXT:    s_andn2_b32 s2, 15, s2
2630; GFX9-NEXT:    s_bfe_u32 s4, 1, 0x100000
2631; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
2632; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x100000
2633; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2634; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2635; GFX9-NEXT:    s_bfe_u32 s2, s3, 0x100000
2636; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
2637; GFX9-NEXT:    s_or_b32 s0, s0, s1
2638; GFX9-NEXT:    ; return to shader part epilog
2639;
2640; GFX10-LABEL: s_fshr_i16:
2641; GFX10:       ; %bb.0:
2642; GFX10-NEXT:    s_and_b32 s3, s2, 15
2643; GFX10-NEXT:    s_bfe_u32 s4, 1, 0x100000
2644; GFX10-NEXT:    s_andn2_b32 s2, 15, s2
2645; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
2646; GFX10-NEXT:    s_bfe_u32 s2, s2, 0x100000
2647; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2648; GFX10-NEXT:    s_bfe_u32 s3, s3, 0x100000
2649; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2650; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
2651; GFX10-NEXT:    s_or_b32 s0, s0, s1
2652; GFX10-NEXT:    ; return to shader part epilog
2653  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2654  ret i16 %result
2655}
2656
2657define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
2658; GFX6-LABEL: s_fshr_i16_4:
2659; GFX6:       ; %bb.0:
2660; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
2661; GFX6-NEXT:    s_lshl_b32 s0, s0, 12
2662; GFX6-NEXT:    s_lshr_b32 s1, s1, 4
2663; GFX6-NEXT:    s_or_b32 s0, s0, s1
2664; GFX6-NEXT:    ; return to shader part epilog
2665;
2666; GFX8-LABEL: s_fshr_i16_4:
2667; GFX8:       ; %bb.0:
2668; GFX8-NEXT:    s_bfe_u32 s2, 12, 0x100000
2669; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2670; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2671; GFX8-NEXT:    s_bfe_u32 s2, 4, 0x100000
2672; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
2673; GFX8-NEXT:    s_or_b32 s0, s0, s1
2674; GFX8-NEXT:    ; return to shader part epilog
2675;
2676; GFX9-LABEL: s_fshr_i16_4:
2677; GFX9:       ; %bb.0:
2678; GFX9-NEXT:    s_bfe_u32 s2, 12, 0x100000
2679; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2680; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2681; GFX9-NEXT:    s_bfe_u32 s2, 4, 0x100000
2682; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
2683; GFX9-NEXT:    s_or_b32 s0, s0, s1
2684; GFX9-NEXT:    ; return to shader part epilog
2685;
2686; GFX10-LABEL: s_fshr_i16_4:
2687; GFX10:       ; %bb.0:
2688; GFX10-NEXT:    s_bfe_u32 s2, 12, 0x100000
2689; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2690; GFX10-NEXT:    s_bfe_u32 s3, 4, 0x100000
2691; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2692; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
2693; GFX10-NEXT:    s_or_b32 s0, s0, s1
2694; GFX10-NEXT:    ; return to shader part epilog
2695  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
2696  ret i16 %result
2697}
2698
2699define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
2700; GFX6-LABEL: s_fshr_i16_5:
2701; GFX6:       ; %bb.0:
2702; GFX6-NEXT:    s_and_b32 s1, s1, 0xffff
2703; GFX6-NEXT:    s_lshl_b32 s0, s0, 11
2704; GFX6-NEXT:    s_lshr_b32 s1, s1, 5
2705; GFX6-NEXT:    s_or_b32 s0, s0, s1
2706; GFX6-NEXT:    ; return to shader part epilog
2707;
2708; GFX8-LABEL: s_fshr_i16_5:
2709; GFX8:       ; %bb.0:
2710; GFX8-NEXT:    s_bfe_u32 s2, 11, 0x100000
2711; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2712; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2713; GFX8-NEXT:    s_bfe_u32 s2, 5, 0x100000
2714; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
2715; GFX8-NEXT:    s_or_b32 s0, s0, s1
2716; GFX8-NEXT:    ; return to shader part epilog
2717;
2718; GFX9-LABEL: s_fshr_i16_5:
2719; GFX9:       ; %bb.0:
2720; GFX9-NEXT:    s_bfe_u32 s2, 11, 0x100000
2721; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2722; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2723; GFX9-NEXT:    s_bfe_u32 s2, 5, 0x100000
2724; GFX9-NEXT:    s_lshr_b32 s1, s1, s2
2725; GFX9-NEXT:    s_or_b32 s0, s0, s1
2726; GFX9-NEXT:    ; return to shader part epilog
2727;
2728; GFX10-LABEL: s_fshr_i16_5:
2729; GFX10:       ; %bb.0:
2730; GFX10-NEXT:    s_bfe_u32 s2, 11, 0x100000
2731; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2732; GFX10-NEXT:    s_bfe_u32 s3, 5, 0x100000
2733; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2734; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
2735; GFX10-NEXT:    s_or_b32 s0, s0, s1
2736; GFX10-NEXT:    ; return to shader part epilog
2737  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
2738  ret i16 %result
2739}
2740
2741define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
2742; GFX6-LABEL: v_fshr_i16:
2743; GFX6:       ; %bb.0:
2744; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2745; GFX6-NEXT:    v_and_b32_e32 v3, 15, v2
2746; GFX6-NEXT:    v_xor_b32_e32 v2, -1, v2
2747; GFX6-NEXT:    v_and_b32_e32 v2, 15, v2
2748; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2749; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
2750; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
2751; GFX6-NEXT:    v_bfe_u32 v2, v3, 0, 16
2752; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2753; GFX6-NEXT:    v_lshrrev_b32_e32 v1, v2, v1
2754; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2755; GFX6-NEXT:    s_setpc_b64 s[30:31]
2756;
2757; GFX8-LABEL: v_fshr_i16:
2758; GFX8:       ; %bb.0:
2759; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2760; GFX8-NEXT:    v_and_b32_e32 v3, 15, v2
2761; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
2762; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
2763; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
2764; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
2765; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
2766; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2767; GFX8-NEXT:    s_setpc_b64 s[30:31]
2768;
2769; GFX9-LABEL: v_fshr_i16:
2770; GFX9:       ; %bb.0:
2771; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2772; GFX9-NEXT:    v_and_b32_e32 v3, 15, v2
2773; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
2774; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
2775; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
2776; GFX9-NEXT:    v_lshlrev_b16_e32 v0, v2, v0
2777; GFX9-NEXT:    v_lshrrev_b16_e32 v1, v3, v1
2778; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2779; GFX9-NEXT:    s_setpc_b64 s[30:31]
2780;
2781; GFX10-LABEL: v_fshr_i16:
2782; GFX10:       ; %bb.0:
2783; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2784; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2785; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
2786; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
2787; GFX10-NEXT:    v_and_b32_e32 v2, 15, v2
2788; GFX10-NEXT:    v_and_b32_e32 v3, 15, v3
2789; GFX10-NEXT:    v_lshrrev_b16 v1, v2, v1
2790; GFX10-NEXT:    v_lshlrev_b16 v0, v3, v0
2791; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
2792; GFX10-NEXT:    s_setpc_b64 s[30:31]
2793  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2794  ret i16 %result
2795}
2796
2797define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) {
2798; GFX6-LABEL: v_fshr_i16_4:
2799; GFX6:       ; %bb.0:
2800; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2801; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2802; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 12, v0
2803; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 4, v1
2804; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2805; GFX6-NEXT:    s_setpc_b64 s[30:31]
2806;
2807; GFX8-LABEL: v_fshr_i16_4:
2808; GFX8:       ; %bb.0:
2809; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2810; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
2811; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
2812; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2813; GFX8-NEXT:    s_setpc_b64 s[30:31]
2814;
2815; GFX9-LABEL: v_fshr_i16_4:
2816; GFX9:       ; %bb.0:
2817; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2818; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
2819; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 4, v1
2820; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2821; GFX9-NEXT:    s_setpc_b64 s[30:31]
2822;
2823; GFX10-LABEL: v_fshr_i16_4:
2824; GFX10:       ; %bb.0:
2825; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2826; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2827; GFX10-NEXT:    v_lshlrev_b16 v0, 12, v0
2828; GFX10-NEXT:    v_lshrrev_b16 v1, 4, v1
2829; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
2830; GFX10-NEXT:    s_setpc_b64 s[30:31]
2831  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
2832  ret i16 %result
2833}
2834
2835define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) {
2836; GFX6-LABEL: v_fshr_i16_5:
2837; GFX6:       ; %bb.0:
2838; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2839; GFX6-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2840; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 11, v0
2841; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 5, v1
2842; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2843; GFX6-NEXT:    s_setpc_b64 s[30:31]
2844;
2845; GFX8-LABEL: v_fshr_i16_5:
2846; GFX8:       ; %bb.0:
2847; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2848; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
2849; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
2850; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2851; GFX8-NEXT:    s_setpc_b64 s[30:31]
2852;
2853; GFX9-LABEL: v_fshr_i16_5:
2854; GFX9:       ; %bb.0:
2855; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2856; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 11, v0
2857; GFX9-NEXT:    v_lshrrev_b16_e32 v1, 5, v1
2858; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2859; GFX9-NEXT:    s_setpc_b64 s[30:31]
2860;
2861; GFX10-LABEL: v_fshr_i16_5:
2862; GFX10:       ; %bb.0:
2863; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2864; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2865; GFX10-NEXT:    v_lshlrev_b16 v0, 11, v0
2866; GFX10-NEXT:    v_lshrrev_b16 v1, 5, v1
2867; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
2868; GFX10-NEXT:    s_setpc_b64 s[30:31]
2869  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
2870  ret i16 %result
2871}
2872
2873define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
2874; GFX6-LABEL: v_fshr_i16_ssv:
2875; GFX6:       ; %bb.0:
2876; GFX6-NEXT:    v_and_b32_e32 v1, 15, v0
2877; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
2878; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
2879; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
2880; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
2881; GFX6-NEXT:    v_lshl_b32_e32 v0, s0, v0
2882; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
2883; GFX6-NEXT:    s_and_b32 s0, s1, 0xffff
2884; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
2885; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
2886; GFX6-NEXT:    ; return to shader part epilog
2887;
2888; GFX8-LABEL: v_fshr_i16_ssv:
2889; GFX8:       ; %bb.0:
2890; GFX8-NEXT:    v_and_b32_e32 v1, 15, v0
2891; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
2892; GFX8-NEXT:    s_bfe_u32 s2, 1, 0x100000
2893; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
2894; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
2895; GFX8-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
2896; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
2897; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
2898; GFX8-NEXT:    ; return to shader part epilog
2899;
2900; GFX9-LABEL: v_fshr_i16_ssv:
2901; GFX9:       ; %bb.0:
2902; GFX9-NEXT:    v_and_b32_e32 v1, 15, v0
2903; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
2904; GFX9-NEXT:    s_bfe_u32 s2, 1, 0x100000
2905; GFX9-NEXT:    v_and_b32_e32 v0, 15, v0
2906; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
2907; GFX9-NEXT:    v_lshlrev_b16_e64 v0, v0, s0
2908; GFX9-NEXT:    v_lshrrev_b16_e64 v1, v1, s1
2909; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
2910; GFX9-NEXT:    ; return to shader part epilog
2911;
2912; GFX10-LABEL: v_fshr_i16_ssv:
2913; GFX10:       ; %bb.0:
2914; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
2915; GFX10-NEXT:    v_and_b32_e32 v0, 15, v0
2916; GFX10-NEXT:    s_bfe_u32 s2, 1, 0x100000
2917; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
2918; GFX10-NEXT:    v_and_b32_e32 v1, 15, v1
2919; GFX10-NEXT:    v_lshrrev_b16 v0, v0, s1
2920; GFX10-NEXT:    v_lshlrev_b16 v1, v1, s0
2921; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
2922; GFX10-NEXT:    ; return to shader part epilog
2923  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2924  %cast.result = bitcast i16 %result to half
2925  ret half %cast.result
2926}
2927
2928define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
2929; GFX6-LABEL: v_fshr_i16_svs:
2930; GFX6:       ; %bb.0:
2931; GFX6-NEXT:    s_and_b32 s2, s1, 15
2932; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
2933; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
2934; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
2935; GFX6-NEXT:    s_lshl_b32 s0, s0, s1
2936; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
2937; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2938; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s1, v0
2939; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
2940; GFX6-NEXT:    ; return to shader part epilog
2941;
2942; GFX8-LABEL: v_fshr_i16_svs:
2943; GFX8:       ; %bb.0:
2944; GFX8-NEXT:    s_and_b32 s2, s1, 15
2945; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
2946; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
2947; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
2948; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
2949; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
2950; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
2951; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
2952; GFX8-NEXT:    ; return to shader part epilog
2953;
2954; GFX9-LABEL: v_fshr_i16_svs:
2955; GFX9:       ; %bb.0:
2956; GFX9-NEXT:    s_and_b32 s2, s1, 15
2957; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
2958; GFX9-NEXT:    s_bfe_u32 s3, 1, 0x100000
2959; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
2960; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x100000
2961; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
2962; GFX9-NEXT:    v_lshrrev_b16_e32 v0, s2, v0
2963; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
2964; GFX9-NEXT:    ; return to shader part epilog
2965;
2966; GFX10-LABEL: v_fshr_i16_svs:
2967; GFX10:       ; %bb.0:
2968; GFX10-NEXT:    s_and_b32 s2, s1, 15
2969; GFX10-NEXT:    s_bfe_u32 s3, 1, 0x100000
2970; GFX10-NEXT:    s_andn2_b32 s1, 15, s1
2971; GFX10-NEXT:    v_lshrrev_b16 v0, s2, v0
2972; GFX10-NEXT:    s_lshl_b32 s0, s0, s3
2973; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
2974; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
2975; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
2976; GFX10-NEXT:    ; return to shader part epilog
2977  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
2978  %cast.result = bitcast i16 %result to half
2979  ret half %cast.result
2980}
2981
2982define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
2983; GFX6-LABEL: v_fshr_i16_vss:
2984; GFX6:       ; %bb.0:
2985; GFX6-NEXT:    s_and_b32 s2, s1, 15
2986; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
2987; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
2988; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
2989; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s1, v0
2990; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
2991; GFX6-NEXT:    s_and_b32 s0, s0, 0xffff
2992; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
2993; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
2994; GFX6-NEXT:    ; return to shader part epilog
2995;
2996; GFX8-LABEL: v_fshr_i16_vss:
2997; GFX8:       ; %bb.0:
2998; GFX8-NEXT:    s_and_b32 s2, s1, 15
2999; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3000; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3001; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3002; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3003; GFX8-NEXT:    s_bfe_u32 s1, s2, 0x100000
3004; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3005; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3006; GFX8-NEXT:    ; return to shader part epilog
3007;
3008; GFX9-LABEL: v_fshr_i16_vss:
3009; GFX9:       ; %bb.0:
3010; GFX9-NEXT:    s_and_b32 s2, s1, 15
3011; GFX9-NEXT:    s_andn2_b32 s1, 15, s1
3012; GFX9-NEXT:    v_lshlrev_b16_e32 v0, 1, v0
3013; GFX9-NEXT:    v_lshlrev_b16_e32 v0, s1, v0
3014; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x100000
3015; GFX9-NEXT:    s_bfe_u32 s1, s2, 0x100000
3016; GFX9-NEXT:    s_lshr_b32 s0, s0, s1
3017; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3018; GFX9-NEXT:    ; return to shader part epilog
3019;
3020; GFX10-LABEL: v_fshr_i16_vss:
3021; GFX10:       ; %bb.0:
3022; GFX10-NEXT:    v_lshlrev_b16 v0, 1, v0
3023; GFX10-NEXT:    s_andn2_b32 s2, 15, s1
3024; GFX10-NEXT:    s_and_b32 s1, s1, 15
3025; GFX10-NEXT:    s_bfe_u32 s0, s0, 0x100000
3026; GFX10-NEXT:    s_bfe_u32 s1, s1, 0x100000
3027; GFX10-NEXT:    v_lshlrev_b16 v0, s2, v0
3028; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
3029; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3030; GFX10-NEXT:    ; return to shader part epilog
3031  %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3032  %cast.result = bitcast i16 %result to half
3033  ret half %cast.result
3034}
3035
3036define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3037; GFX6-LABEL: s_fshr_v2i16:
3038; GFX6:       ; %bb.0:
3039; GFX6-NEXT:    s_mov_b32 s6, 0xffff
3040; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
3041; GFX6-NEXT:    s_and_b32 s4, s4, s6
3042; GFX6-NEXT:    s_or_b32 s4, s5, s4
3043; GFX6-NEXT:    s_bfe_u32 s5, 1, 0x100000
3044; GFX6-NEXT:    s_lshl_b32 s0, s0, s5
3045; GFX6-NEXT:    s_and_b32 s7, s2, s6
3046; GFX6-NEXT:    s_lshl_b32 s1, s1, s5
3047; GFX6-NEXT:    s_and_b32 s5, s3, s6
3048; GFX6-NEXT:    s_lshr_b32 s7, s7, 15
3049; GFX6-NEXT:    s_lshr_b32 s5, s5, 15
3050; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3051; GFX6-NEXT:    s_xor_b32 s4, s4, -1
3052; GFX6-NEXT:    s_or_b32 s0, s0, s7
3053; GFX6-NEXT:    s_or_b32 s1, s1, s5
3054; GFX6-NEXT:    s_lshr_b32 s5, s4, 16
3055; GFX6-NEXT:    s_and_b32 s7, s4, 15
3056; GFX6-NEXT:    s_andn2_b32 s4, 15, s4
3057; GFX6-NEXT:    s_and_b32 s2, s2, s6
3058; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
3059; GFX6-NEXT:    s_lshr_b32 s2, s2, 1
3060; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
3061; GFX6-NEXT:    s_lshl_b32 s0, s0, s7
3062; GFX6-NEXT:    s_lshr_b32 s2, s2, s4
3063; GFX6-NEXT:    s_or_b32 s0, s0, s2
3064; GFX6-NEXT:    s_and_b32 s2, s5, 15
3065; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3066; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3067; GFX6-NEXT:    s_andn2_b32 s4, 15, s5
3068; GFX6-NEXT:    s_lshl_b32 s1, s1, s2
3069; GFX6-NEXT:    s_and_b32 s2, s3, s6
3070; GFX6-NEXT:    s_lshr_b32 s2, s2, 1
3071; GFX6-NEXT:    s_bfe_u32 s3, s4, 0x100000
3072; GFX6-NEXT:    s_lshr_b32 s2, s2, s3
3073; GFX6-NEXT:    s_or_b32 s1, s1, s2
3074; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3075; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3076; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3077; GFX6-NEXT:    s_or_b32 s0, s0, s1
3078; GFX6-NEXT:    ; return to shader part epilog
3079;
3080; GFX8-LABEL: s_fshr_v2i16:
3081; GFX8:       ; %bb.0:
3082; GFX8-NEXT:    s_bfe_u32 s5, 1, 0x100000
3083; GFX8-NEXT:    s_bfe_u32 s6, s1, 0x100000
3084; GFX8-NEXT:    s_bfe_u32 s7, 15, 0x100000
3085; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
3086; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3087; GFX8-NEXT:    s_lshl_b32 s0, s0, s5
3088; GFX8-NEXT:    s_lshr_b32 s6, s6, s7
3089; GFX8-NEXT:    s_or_b32 s0, s0, s6
3090; GFX8-NEXT:    s_lshl_b32 s3, s3, s5
3091; GFX8-NEXT:    s_lshr_b32 s6, s4, s7
3092; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
3093; GFX8-NEXT:    s_xor_b32 s2, s2, -1
3094; GFX8-NEXT:    s_or_b32 s3, s3, s6
3095; GFX8-NEXT:    s_lshr_b32 s6, s2, 16
3096; GFX8-NEXT:    s_and_b32 s7, s2, 15
3097; GFX8-NEXT:    s_andn2_b32 s2, 15, s2
3098; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3099; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
3100; GFX8-NEXT:    s_lshr_b32 s1, s1, s5
3101; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3102; GFX8-NEXT:    s_lshl_b32 s0, s0, s7
3103; GFX8-NEXT:    s_lshr_b32 s1, s1, s2
3104; GFX8-NEXT:    s_or_b32 s0, s0, s1
3105; GFX8-NEXT:    s_and_b32 s1, s6, 15
3106; GFX8-NEXT:    s_lshl_b32 s4, s4, s5
3107; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3108; GFX8-NEXT:    s_andn2_b32 s2, 15, s6
3109; GFX8-NEXT:    s_lshl_b32 s1, s3, s1
3110; GFX8-NEXT:    s_bfe_u32 s3, s4, 0x100000
3111; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
3112; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3113; GFX8-NEXT:    s_lshr_b32 s2, s3, s2
3114; GFX8-NEXT:    s_or_b32 s1, s1, s2
3115; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3116; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3117; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
3118; GFX8-NEXT:    s_or_b32 s0, s0, s1
3119; GFX8-NEXT:    ; return to shader part epilog
3120;
3121; GFX9-LABEL: s_fshr_v2i16:
3122; GFX9:       ; %bb.0:
3123; GFX9-NEXT:    s_mov_b32 s3, 0xf000f
3124; GFX9-NEXT:    s_and_b32 s4, s2, s3
3125; GFX9-NEXT:    s_andn2_b32 s2, s3, s2
3126; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
3127; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3128; GFX9-NEXT:    s_lshl_b32 s3, s3, 1
3129; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3130; GFX9-NEXT:    s_lshr_b32 s3, s0, 16
3131; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
3132; GFX9-NEXT:    s_lshl_b32 s0, s0, s2
3133; GFX9-NEXT:    s_lshl_b32 s2, s3, s5
3134; GFX9-NEXT:    s_mov_b32 s3, 0xffff
3135; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3136; GFX9-NEXT:    s_lshr_b32 s2, s1, 16
3137; GFX9-NEXT:    s_and_b32 s1, s1, s3
3138; GFX9-NEXT:    s_lshr_b32 s5, s4, 16
3139; GFX9-NEXT:    s_and_b32 s3, s4, s3
3140; GFX9-NEXT:    s_lshr_b32 s1, s1, s3
3141; GFX9-NEXT:    s_lshr_b32 s2, s2, s5
3142; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s2
3143; GFX9-NEXT:    s_or_b32 s0, s0, s1
3144; GFX9-NEXT:    ; return to shader part epilog
3145;
3146; GFX10-LABEL: s_fshr_v2i16:
3147; GFX10:       ; %bb.0:
3148; GFX10-NEXT:    s_lshr_b32 s4, s0, 16
3149; GFX10-NEXT:    s_mov_b32 s3, 0xf000f
3150; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3151; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
3152; GFX10-NEXT:    s_and_b32 s5, s2, s3
3153; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3154; GFX10-NEXT:    s_andn2_b32 s2, s3, s2
3155; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3156; GFX10-NEXT:    s_lshr_b32 s4, s2, 16
3157; GFX10-NEXT:    s_lshl_b32 s0, s0, s2
3158; GFX10-NEXT:    s_lshl_b32 s2, s3, s4
3159; GFX10-NEXT:    s_mov_b32 s3, 0xffff
3160; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
3161; GFX10-NEXT:    s_and_b32 s1, s1, s3
3162; GFX10-NEXT:    s_and_b32 s3, s5, s3
3163; GFX10-NEXT:    s_lshr_b32 s5, s5, 16
3164; GFX10-NEXT:    s_lshr_b32 s1, s1, s3
3165; GFX10-NEXT:    s_lshr_b32 s3, s4, s5
3166; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3167; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s3
3168; GFX10-NEXT:    s_or_b32 s0, s0, s1
3169; GFX10-NEXT:    ; return to shader part epilog
3170  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3171  %cast = bitcast <2 x i16> %result to i32
3172  ret i32 %cast
3173}
3174
3175define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3176; GFX6-LABEL: v_fshr_v2i16:
3177; GFX6:       ; %bb.0:
3178; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3179; GFX6-NEXT:    v_mov_b32_e32 v6, 0xffff
3180; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
3181; GFX6-NEXT:    v_and_b32_e32 v4, v4, v6
3182; GFX6-NEXT:    s_mov_b32 s5, 0xffff
3183; GFX6-NEXT:    v_or_b32_e32 v4, v5, v4
3184; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
3185; GFX6-NEXT:    v_and_b32_e32 v5, s5, v2
3186; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
3187; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 15, v5
3188; GFX6-NEXT:    v_or_b32_e32 v0, v0, v5
3189; GFX6-NEXT:    v_and_b32_e32 v5, s5, v3
3190; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
3191; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 15, v5
3192; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3193; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
3194; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 1, v2
3195; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
3196; GFX6-NEXT:    v_and_b32_e32 v7, 15, v4
3197; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
3198; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3199; GFX6-NEXT:    v_and_b32_e32 v2, v2, v6
3200; GFX6-NEXT:    v_bfe_u32 v7, v7, 0, 16
3201; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3202; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
3203; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v7, v0
3204; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
3205; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3206; GFX6-NEXT:    v_and_b32_e32 v2, 15, v5
3207; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
3208; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v5
3209; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3210; GFX6-NEXT:    v_and_b32_e32 v4, 15, v4
3211; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v2, v1
3212; GFX6-NEXT:    v_and_b32_e32 v2, v3, v6
3213; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3214; GFX6-NEXT:    v_bfe_u32 v3, v4, 0, 16
3215; GFX6-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
3216; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3217; GFX6-NEXT:    s_setpc_b64 s[30:31]
3218;
3219; GFX8-LABEL: v_fshr_v2i16:
3220; GFX8:       ; %bb.0:
3221; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3222; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
3223; GFX8-NEXT:    v_lshrrev_b16_e32 v4, 15, v1
3224; GFX8-NEXT:    v_or_b32_e32 v3, v3, v4
3225; GFX8-NEXT:    v_mov_b32_e32 v4, 1
3226; GFX8-NEXT:    v_mov_b32_e32 v5, 15
3227; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3228; GFX8-NEXT:    v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3229; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3230; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
3231; GFX8-NEXT:    v_lshlrev_b16_e32 v5, 1, v1
3232; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3233; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
3234; GFX8-NEXT:    v_and_b32_e32 v6, 15, v2
3235; GFX8-NEXT:    v_xor_b32_e32 v2, -1, v2
3236; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
3237; GFX8-NEXT:    v_lshrrev_b16_e32 v5, 1, v5
3238; GFX8-NEXT:    v_lshlrev_b16_e32 v3, v6, v3
3239; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v2, v5
3240; GFX8-NEXT:    v_or_b32_e32 v2, v3, v2
3241; GFX8-NEXT:    v_and_b32_e32 v3, 15, v4
3242; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
3243; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
3244; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 1, v1
3245; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v3, v0
3246; GFX8-NEXT:    v_lshrrev_b16_e32 v1, v4, v1
3247; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
3248; GFX8-NEXT:    v_mov_b32_e32 v1, 16
3249; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3250; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3251; GFX8-NEXT:    s_setpc_b64 s[30:31]
3252;
3253; GFX9-LABEL: v_fshr_v2i16:
3254; GFX9:       ; %bb.0:
3255; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3256; GFX9-NEXT:    s_mov_b32 s4, 0xf000f
3257; GFX9-NEXT:    v_and_b32_e32 v3, s4, v2
3258; GFX9-NEXT:    v_xor_b32_e32 v2, -1, v2
3259; GFX9-NEXT:    v_and_b32_e32 v2, s4, v2
3260; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3261; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
3262; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
3263; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3264; GFX9-NEXT:    s_setpc_b64 s[30:31]
3265;
3266; GFX10-LABEL: v_fshr_v2i16:
3267; GFX10:       ; %bb.0:
3268; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3269; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3270; GFX10-NEXT:    v_xor_b32_e32 v3, -1, v2
3271; GFX10-NEXT:    s_mov_b32 s4, 0xf000f
3272; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3273; GFX10-NEXT:    v_and_b32_e32 v2, s4, v2
3274; GFX10-NEXT:    v_and_b32_e32 v3, s4, v3
3275; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3276; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
3277; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3278; GFX10-NEXT:    s_setpc_b64 s[30:31]
3279  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3280  ret <2 x i16> %result
3281}
3282
3283define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
3284; GFX6-LABEL: v_fshr_v2i16_4_8:
3285; GFX6:       ; %bb.0:
3286; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3287; GFX6-NEXT:    s_bfe_u32 s4, 12, 0x100000
3288; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
3289; GFX6-NEXT:    s_mov_b32 s4, 0xffff
3290; GFX6-NEXT:    v_and_b32_e32 v2, s4, v2
3291; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3292; GFX6-NEXT:    s_bfe_u32 s5, 3, 0x100000
3293; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s5, v2
3294; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
3295; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
3296; GFX6-NEXT:    s_bfe_u32 s5, 8, 0x100000
3297; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v2
3298; GFX6-NEXT:    s_bfe_u32 s4, 7, 0x100000
3299; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s5, v1
3300; GFX6-NEXT:    v_lshrrev_b32_e32 v2, s4, v2
3301; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
3302; GFX6-NEXT:    s_setpc_b64 s[30:31]
3303;
3304; GFX8-LABEL: v_fshr_v2i16_4_8:
3305; GFX8:       ; %bb.0:
3306; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
3308; GFX8-NEXT:    v_lshlrev_b16_e32 v0, 12, v0
3309; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 4, v1
3310; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
3311; GFX8-NEXT:    v_mov_b32_e32 v3, 8
3312; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 8, v2
3313; GFX8-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3314; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
3315; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3316; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3317; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3318; GFX8-NEXT:    s_setpc_b64 s[30:31]
3319;
3320; GFX9-LABEL: v_fshr_v2i16_4_8:
3321; GFX9:       ; %bb.0:
3322; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v2, 16
3324; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v2
3325; GFX9-NEXT:    v_cvt_f32_ubyte0_e32 v3, 16
3326; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v3
3327; GFX9-NEXT:    s_mov_b32 s4, 0x4f7ffffe
3328; GFX9-NEXT:    v_mul_f32_e32 v2, s4, v2
3329; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
3330; GFX9-NEXT:    v_mul_f32_e32 v3, s4, v3
3331; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
3332; GFX9-NEXT:    v_mul_lo_u32 v4, -16, v2
3333; GFX9-NEXT:    v_mul_lo_u32 v5, -16, v3
3334; GFX9-NEXT:    v_mul_hi_u32 v4, v2, v4
3335; GFX9-NEXT:    v_mul_hi_u32 v5, v3, v5
3336; GFX9-NEXT:    v_add_u32_e32 v2, v2, v4
3337; GFX9-NEXT:    v_mul_hi_u32 v2, 4, v2
3338; GFX9-NEXT:    v_add_u32_e32 v3, v3, v5
3339; GFX9-NEXT:    v_mul_hi_u32 v3, 8, v3
3340; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
3341; GFX9-NEXT:    v_sub_u32_e32 v2, 4, v2
3342; GFX9-NEXT:    v_subrev_u32_e32 v4, 16, v2
3343; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 16, v2
3344; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 4, v3
3345; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
3346; GFX9-NEXT:    v_subrev_u32_e32 v4, 16, v2
3347; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 16, v2
3348; GFX9-NEXT:    v_sub_u32_e32 v3, 8, v3
3349; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
3350; GFX9-NEXT:    v_subrev_u32_e32 v4, 16, v3
3351; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 16, v3
3352; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
3353; GFX9-NEXT:    v_subrev_u32_e32 v4, 16, v3
3354; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, 16, v3
3355; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
3356; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
3357; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
3358; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
3359; GFX9-NEXT:    v_pk_sub_i16 v3, 16, v2 op_sel_hi:[0,1]
3360; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
3361; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3362; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3363; GFX9-NEXT:    s_setpc_b64 s[30:31]
3364;
3365; GFX10-LABEL: v_fshr_v2i16_4_8:
3366; GFX10:       ; %bb.0:
3367; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3368; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
3369; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v2, 16
3370; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v3, 16
3371; GFX10-NEXT:    s_mov_b32 s4, 0x4f7ffffe
3372; GFX10-NEXT:    v_rcp_iflag_f32_e32 v2, v2
3373; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
3374; GFX10-NEXT:    v_mul_f32_e32 v2, s4, v2
3375; GFX10-NEXT:    v_mul_f32_e32 v3, s4, v3
3376; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
3377; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
3378; GFX10-NEXT:    v_mul_lo_u32 v4, -16, v2
3379; GFX10-NEXT:    v_mul_lo_u32 v5, -16, v3
3380; GFX10-NEXT:    v_mul_hi_u32 v4, v2, v4
3381; GFX10-NEXT:    v_mul_hi_u32 v5, v3, v5
3382; GFX10-NEXT:    v_add_nc_u32_e32 v2, v2, v4
3383; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v5
3384; GFX10-NEXT:    v_mul_hi_u32 v2, 8, v2
3385; GFX10-NEXT:    v_mul_hi_u32 v3, 4, v3
3386; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
3387; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 4, v3
3388; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 8, v2
3389; GFX10-NEXT:    v_sub_nc_u32_e32 v3, 4, v3
3390; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, 16, v2
3391; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v2
3392; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, 16, v3
3393; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc_lo
3394; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v3
3395; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, 16, v2
3396; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc_lo
3397; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v2
3398; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, 16, v3
3399; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc_lo
3400; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v3
3401; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
3402; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc_lo
3403; GFX10-NEXT:    v_and_or_b32 v2, 0xffff, v3, v2
3404; GFX10-NEXT:    v_pk_sub_i16 v3, 16, v2 op_sel_hi:[0,1]
3405; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
3406; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v3, v0
3407; GFX10-NEXT:    v_or_b32_e32 v0, v0, v1
3408; GFX10-NEXT:    s_setpc_b64 s[30:31]
3409  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
3410  ret <2 x i16> %result
3411}
3412
3413define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
3414; GFX6-LABEL: v_fshr_v2i16_ssv:
3415; GFX6:       ; %bb.0:
3416; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3417; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
3418; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
3419; GFX6-NEXT:    s_mov_b32 s5, 0xffff
3420; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
3421; GFX6-NEXT:    s_and_b32 s6, s2, s5
3422; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3423; GFX6-NEXT:    s_lshl_b32 s0, s0, s4
3424; GFX6-NEXT:    s_lshr_b32 s6, s6, 15
3425; GFX6-NEXT:    v_and_b32_e32 v2, 15, v0
3426; GFX6-NEXT:    s_or_b32 s0, s0, s6
3427; GFX6-NEXT:    s_lshl_b32 s2, s2, 1
3428; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
3429; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
3430; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3431; GFX6-NEXT:    v_and_b32_e32 v0, 15, v0
3432; GFX6-NEXT:    v_lshl_b32_e32 v2, s0, v2
3433; GFX6-NEXT:    s_and_b32 s0, s2, s5
3434; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
3435; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3436; GFX6-NEXT:    v_lshr_b32_e32 v0, s0, v0
3437; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
3438; GFX6-NEXT:    s_and_b32 s4, s3, s5
3439; GFX6-NEXT:    s_lshl_b32 s3, s3, 1
3440; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
3441; GFX6-NEXT:    v_and_b32_e32 v2, 15, v1
3442; GFX6-NEXT:    v_xor_b32_e32 v1, -1, v1
3443; GFX6-NEXT:    s_lshr_b32 s4, s4, 15
3444; GFX6-NEXT:    v_and_b32_e32 v1, 15, v1
3445; GFX6-NEXT:    s_and_b32 s0, s3, s5
3446; GFX6-NEXT:    s_or_b32 s1, s1, s4
3447; GFX6-NEXT:    v_bfe_u32 v2, v2, 0, 16
3448; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
3449; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3450; GFX6-NEXT:    v_lshl_b32_e32 v2, s1, v2
3451; GFX6-NEXT:    v_lshr_b32_e32 v1, s0, v1
3452; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
3453; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3454; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3455; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3456; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3457; GFX6-NEXT:    ; return to shader part epilog
3458;
3459; GFX8-LABEL: v_fshr_v2i16_ssv:
3460; GFX8:       ; %bb.0:
3461; GFX8-NEXT:    s_bfe_u32 s4, 1, 0x100000
3462; GFX8-NEXT:    s_bfe_u32 s5, s1, 0x100000
3463; GFX8-NEXT:    s_bfe_u32 s6, 15, 0x100000
3464; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3465; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
3466; GFX8-NEXT:    s_lshr_b32 s5, s5, s6
3467; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3468; GFX8-NEXT:    s_lshr_b32 s3, s1, 16
3469; GFX8-NEXT:    s_or_b32 s0, s0, s5
3470; GFX8-NEXT:    s_lshl_b32 s1, s1, s4
3471; GFX8-NEXT:    v_and_b32_e32 v2, 15, v0
3472; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
3473; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
3474; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s0
3475; GFX8-NEXT:    s_bfe_u32 s0, s1, 0x100000
3476; GFX8-NEXT:    v_and_b32_e32 v0, 15, v0
3477; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
3478; GFX8-NEXT:    s_lshr_b32 s5, s3, s6
3479; GFX8-NEXT:    s_lshl_b32 s3, s3, s4
3480; GFX8-NEXT:    v_lshrrev_b16_e64 v0, v0, s0
3481; GFX8-NEXT:    s_lshl_b32 s2, s2, s4
3482; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
3483; GFX8-NEXT:    v_and_b32_e32 v2, 15, v1
3484; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v1
3485; GFX8-NEXT:    s_bfe_u32 s0, s3, 0x100000
3486; GFX8-NEXT:    s_or_b32 s2, s2, s5
3487; GFX8-NEXT:    v_and_b32_e32 v1, 15, v1
3488; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
3489; GFX8-NEXT:    v_lshlrev_b16_e64 v2, v2, s2
3490; GFX8-NEXT:    v_lshrrev_b16_e64 v1, v1, s0
3491; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
3492; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3493; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3494; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3495; GFX8-NEXT:    ; return to shader part epilog
3496;
3497; GFX9-LABEL: v_fshr_v2i16_ssv:
3498; GFX9:       ; %bb.0:
3499; GFX9-NEXT:    s_mov_b32 s2, 0xf000f
3500; GFX9-NEXT:    v_and_b32_e32 v1, s2, v0
3501; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
3502; GFX9-NEXT:    v_and_b32_e32 v0, s2, v0
3503; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3504; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3505; GFX9-NEXT:    s_lshl_b32 s2, s2, 1
3506; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3507; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, s0
3508; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v1, s1
3509; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
3510; GFX9-NEXT:    ; return to shader part epilog
3511;
3512; GFX10-LABEL: v_fshr_v2i16_ssv:
3513; GFX10:       ; %bb.0:
3514; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
3515; GFX10-NEXT:    s_mov_b32 s2, 0xf000f
3516; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3517; GFX10-NEXT:    v_and_b32_e32 v0, s2, v0
3518; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3519; GFX10-NEXT:    v_and_b32_e32 v1, s2, v1
3520; GFX10-NEXT:    s_lshl_b32 s2, s3, 1
3521; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3522; GFX10-NEXT:    v_pk_lshrrev_b16 v0, v0, s1
3523; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v1, s0
3524; GFX10-NEXT:    v_or_b32_e32 v0, v1, v0
3525; GFX10-NEXT:    ; return to shader part epilog
3526  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3527  %cast = bitcast <2 x i16> %result to float
3528  ret float %cast
3529}
3530
3531define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
3532; GFX6-LABEL: v_fshr_v2i16_svs:
3533; GFX6:       ; %bb.0:
3534; GFX6-NEXT:    s_mov_b32 s4, 0xffff
3535; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
3536; GFX6-NEXT:    s_and_b32 s2, s2, s4
3537; GFX6-NEXT:    s_or_b32 s2, s3, s2
3538; GFX6-NEXT:    s_bfe_u32 s3, 1, 0x100000
3539; GFX6-NEXT:    v_and_b32_e32 v2, s4, v0
3540; GFX6-NEXT:    s_lshl_b32 s0, s0, s3
3541; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 15, v2
3542; GFX6-NEXT:    v_and_b32_e32 v3, s4, v1
3543; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
3544; GFX6-NEXT:    s_lshl_b32 s0, s1, s3
3545; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 15, v3
3546; GFX6-NEXT:    v_or_b32_e32 v3, s0, v3
3547; GFX6-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
3548; GFX6-NEXT:    s_xor_b32 s0, s2, -1
3549; GFX6-NEXT:    s_lshr_b32 s1, s0, 16
3550; GFX6-NEXT:    s_and_b32 s2, s0, 15
3551; GFX6-NEXT:    s_andn2_b32 s0, 15, s0
3552; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
3553; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v0
3554; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3555; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v1
3556; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3557; GFX6-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
3558; GFX6-NEXT:    s_and_b32 s0, s1, 15
3559; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s2, v2
3560; GFX6-NEXT:    s_andn2_b32 s1, 15, s1
3561; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3562; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
3563; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
3564; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s0, v3
3565; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 1, v1
3566; GFX6-NEXT:    s_bfe_u32 s0, s1, 0x100000
3567; GFX6-NEXT:    v_lshrrev_b32_e32 v1, s0, v1
3568; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
3569; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3570; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3571; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3572; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3573; GFX6-NEXT:    ; return to shader part epilog
3574;
3575; GFX8-LABEL: v_fshr_v2i16_svs:
3576; GFX8:       ; %bb.0:
3577; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
3578; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3579; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
3580; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 15, v0
3581; GFX8-NEXT:    v_mov_b32_e32 v2, 15
3582; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
3583; GFX8-NEXT:    s_lshl_b32 s0, s2, s3
3584; GFX8-NEXT:    v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3585; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
3586; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 1, v0
3587; GFX8-NEXT:    v_mov_b32_e32 v4, 1
3588; GFX8-NEXT:    s_xor_b32 s0, s1, -1
3589; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3590; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
3591; GFX8-NEXT:    s_and_b32 s2, s0, 15
3592; GFX8-NEXT:    s_andn2_b32 s0, 15, s0
3593; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
3594; GFX8-NEXT:    v_lshrrev_b16_e32 v3, s0, v3
3595; GFX8-NEXT:    s_and_b32 s0, s1, 15
3596; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3597; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 1, v0
3598; GFX8-NEXT:    v_lshlrev_b16_e32 v2, s0, v2
3599; GFX8-NEXT:    v_lshrrev_b16_e32 v0, s1, v0
3600; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s2, v1
3601; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
3602; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3603; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
3604; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3605; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3606; GFX8-NEXT:    ; return to shader part epilog
3607;
3608; GFX9-LABEL: v_fshr_v2i16_svs:
3609; GFX9:       ; %bb.0:
3610; GFX9-NEXT:    s_mov_b32 s2, 0xf000f
3611; GFX9-NEXT:    s_and_b32 s3, s1, s2
3612; GFX9-NEXT:    s_andn2_b32 s1, s2, s1
3613; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3614; GFX9-NEXT:    s_lshl_b32 s0, s0, 0x10001
3615; GFX9-NEXT:    s_lshl_b32 s2, s2, 1
3616; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s2
3617; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
3618; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
3619; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
3620; GFX9-NEXT:    s_lshl_b32 s1, s2, s4
3621; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3622; GFX9-NEXT:    v_pk_lshrrev_b16 v0, s3, v0
3623; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3624; GFX9-NEXT:    ; return to shader part epilog
3625;
3626; GFX10-LABEL: v_fshr_v2i16_svs:
3627; GFX10:       ; %bb.0:
3628; GFX10-NEXT:    s_lshr_b32 s3, s0, 16
3629; GFX10-NEXT:    s_mov_b32 s2, 0xf000f
3630; GFX10-NEXT:    s_lshl_b32 s0, s0, 0x10001
3631; GFX10-NEXT:    s_lshl_b32 s3, s3, 1
3632; GFX10-NEXT:    s_and_b32 s4, s1, s2
3633; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s3
3634; GFX10-NEXT:    s_andn2_b32 s1, s2, s1
3635; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
3636; GFX10-NEXT:    s_lshr_b32 s3, s1, 16
3637; GFX10-NEXT:    v_pk_lshrrev_b16 v0, s4, v0
3638; GFX10-NEXT:    s_lshl_b32 s0, s0, s1
3639; GFX10-NEXT:    s_lshl_b32 s1, s2, s3
3640; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3641; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3642; GFX10-NEXT:    ; return to shader part epilog
3643  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3644  %cast = bitcast <2 x i16> %result to float
3645  ret float %cast
3646}
3647
3648define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3649; GFX6-LABEL: v_fshr_v2i16_vss:
3650; GFX6:       ; %bb.0:
3651; GFX6-NEXT:    s_mov_b32 s4, 0xffff
3652; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
3653; GFX6-NEXT:    s_and_b32 s2, s2, s4
3654; GFX6-NEXT:    s_or_b32 s2, s3, s2
3655; GFX6-NEXT:    s_bfe_u32 s3, 1, 0x100000
3656; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s3, v0
3657; GFX6-NEXT:    s_and_b32 s5, s0, s4
3658; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s3, v1
3659; GFX6-NEXT:    s_and_b32 s3, s1, s4
3660; GFX6-NEXT:    s_lshr_b32 s5, s5, 15
3661; GFX6-NEXT:    s_lshr_b32 s3, s3, 15
3662; GFX6-NEXT:    s_lshl_b32 s0, s0, 1
3663; GFX6-NEXT:    s_xor_b32 s2, s2, -1
3664; GFX6-NEXT:    v_or_b32_e32 v0, s5, v0
3665; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
3666; GFX6-NEXT:    s_lshr_b32 s3, s2, 16
3667; GFX6-NEXT:    s_and_b32 s5, s2, 15
3668; GFX6-NEXT:    s_andn2_b32 s2, 15, s2
3669; GFX6-NEXT:    s_and_b32 s0, s0, s4
3670; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
3671; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
3672; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3673; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s5, v0
3674; GFX6-NEXT:    s_lshr_b32 s0, s0, s2
3675; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
3676; GFX6-NEXT:    s_and_b32 s0, s3, 15
3677; GFX6-NEXT:    s_lshl_b32 s1, s1, 1
3678; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3679; GFX6-NEXT:    s_andn2_b32 s2, 15, s3
3680; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s0, v1
3681; GFX6-NEXT:    s_and_b32 s0, s1, s4
3682; GFX6-NEXT:    s_lshr_b32 s0, s0, 1
3683; GFX6-NEXT:    s_bfe_u32 s1, s2, 0x100000
3684; GFX6-NEXT:    s_lshr_b32 s0, s0, s1
3685; GFX6-NEXT:    v_or_b32_e32 v1, s0, v1
3686; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 16
3687; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 16
3688; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3689; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
3690; GFX6-NEXT:    ; return to shader part epilog
3691;
3692; GFX8-LABEL: v_fshr_v2i16_vss:
3693; GFX8:       ; %bb.0:
3694; GFX8-NEXT:    s_bfe_u32 s3, s0, 0x100000
3695; GFX8-NEXT:    s_bfe_u32 s4, 15, 0x100000
3696; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
3697; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 1, v0
3698; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
3699; GFX8-NEXT:    v_mov_b32_e32 v2, 1
3700; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
3701; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3702; GFX8-NEXT:    s_lshr_b32 s3, s2, s4
3703; GFX8-NEXT:    v_or_b32_e32 v0, s3, v0
3704; GFX8-NEXT:    s_bfe_u32 s3, 1, 0x100000
3705; GFX8-NEXT:    s_lshl_b32 s0, s0, s3
3706; GFX8-NEXT:    s_xor_b32 s1, s1, -1
3707; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
3708; GFX8-NEXT:    s_and_b32 s5, s1, 15
3709; GFX8-NEXT:    s_andn2_b32 s1, 15, s1
3710; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3711; GFX8-NEXT:    s_lshr_b32 s0, s0, s3
3712; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3713; GFX8-NEXT:    v_lshlrev_b16_e32 v1, s5, v1
3714; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3715; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
3716; GFX8-NEXT:    v_or_b32_e32 v1, s0, v1
3717; GFX8-NEXT:    s_and_b32 s0, s4, 15
3718; GFX8-NEXT:    s_andn2_b32 s1, 15, s4
3719; GFX8-NEXT:    v_lshlrev_b16_e32 v0, s0, v0
3720; GFX8-NEXT:    s_bfe_u32 s0, s2, 0x100000
3721; GFX8-NEXT:    s_lshr_b32 s0, s0, s3
3722; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3723; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
3724; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
3725; GFX8-NEXT:    v_mov_b32_e32 v2, 16
3726; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3727; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3728; GFX8-NEXT:    ; return to shader part epilog
3729;
3730; GFX9-LABEL: v_fshr_v2i16_vss:
3731; GFX9:       ; %bb.0:
3732; GFX9-NEXT:    s_mov_b32 s2, 0xf000f
3733; GFX9-NEXT:    s_and_b32 s3, s1, s2
3734; GFX9-NEXT:    s_andn2_b32 s1, s2, s1
3735; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3736; GFX9-NEXT:    s_mov_b32 s2, 0xffff
3737; GFX9-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
3738; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
3739; GFX9-NEXT:    s_and_b32 s0, s0, s2
3740; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
3741; GFX9-NEXT:    s_and_b32 s2, s3, s2
3742; GFX9-NEXT:    s_lshr_b32 s0, s0, s2
3743; GFX9-NEXT:    s_lshr_b32 s1, s1, s4
3744; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3745; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
3746; GFX9-NEXT:    ; return to shader part epilog
3747;
3748; GFX10-LABEL: v_fshr_v2i16_vss:
3749; GFX10:       ; %bb.0:
3750; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3751; GFX10-NEXT:    s_mov_b32 s2, 0xf000f
3752; GFX10-NEXT:    s_mov_b32 s3, 0xffff
3753; GFX10-NEXT:    s_and_b32 s4, s1, s2
3754; GFX10-NEXT:    s_andn2_b32 s1, s2, s1
3755; GFX10-NEXT:    s_lshr_b32 s2, s0, 16
3756; GFX10-NEXT:    s_and_b32 s0, s0, s3
3757; GFX10-NEXT:    v_pk_lshlrev_b16 v0, s1, v0
3758; GFX10-NEXT:    s_and_b32 s1, s4, s3
3759; GFX10-NEXT:    s_lshr_b32 s3, s4, 16
3760; GFX10-NEXT:    s_lshr_b32 s0, s0, s1
3761; GFX10-NEXT:    s_lshr_b32 s1, s2, s3
3762; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
3763; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
3764; GFX10-NEXT:    ; return to shader part epilog
3765  %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3766  %cast = bitcast <2 x i16> %result to float
3767  ret float %cast
3768}
3769
3770; ; FIXME
3771; define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
3772;   %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
3773;   %cast = bitcast <3 x i16> %result to i48
3774;   ret i48 %cast
3775; }
3776
3777; ; FIXME
3778; define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
3779;   %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
3780;   %cast.result = bitcast <3 x i16> %result to <3 x half>
3781;   ret <3 x half> %cast.result
3782; }
3783
3784define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
3785; GFX6-LABEL: s_fshr_v4i16:
3786; GFX6:       ; %bb.0:
3787; GFX6-NEXT:    s_mov_b32 s12, 0xffff
3788; GFX6-NEXT:    s_lshl_b32 s9, s9, 16
3789; GFX6-NEXT:    s_and_b32 s8, s8, s12
3790; GFX6-NEXT:    s_or_b32 s8, s9, s8
3791; GFX6-NEXT:    s_lshl_b32 s9, s11, 16
3792; GFX6-NEXT:    s_and_b32 s10, s10, s12
3793; GFX6-NEXT:    s_or_b32 s9, s9, s10
3794; GFX6-NEXT:    s_bfe_u32 s10, 1, 0x100000
3795; GFX6-NEXT:    s_and_b32 s11, s4, s12
3796; GFX6-NEXT:    s_lshl_b32 s0, s0, s10
3797; GFX6-NEXT:    s_lshr_b32 s11, s11, 15
3798; GFX6-NEXT:    s_or_b32 s0, s0, s11
3799; GFX6-NEXT:    s_and_b32 s11, s5, s12
3800; GFX6-NEXT:    s_lshl_b32 s1, s1, s10
3801; GFX6-NEXT:    s_lshr_b32 s11, s11, 15
3802; GFX6-NEXT:    s_lshl_b32 s4, s4, 1
3803; GFX6-NEXT:    s_xor_b32 s8, s8, -1
3804; GFX6-NEXT:    s_or_b32 s1, s1, s11
3805; GFX6-NEXT:    s_lshr_b32 s11, s8, 16
3806; GFX6-NEXT:    s_and_b32 s13, s8, 15
3807; GFX6-NEXT:    s_andn2_b32 s8, 15, s8
3808; GFX6-NEXT:    s_and_b32 s4, s4, s12
3809; GFX6-NEXT:    s_bfe_u32 s13, s13, 0x100000
3810; GFX6-NEXT:    s_lshr_b32 s4, s4, 1
3811; GFX6-NEXT:    s_bfe_u32 s8, s8, 0x100000
3812; GFX6-NEXT:    s_lshl_b32 s0, s0, s13
3813; GFX6-NEXT:    s_lshr_b32 s4, s4, s8
3814; GFX6-NEXT:    s_or_b32 s0, s0, s4
3815; GFX6-NEXT:    s_and_b32 s4, s11, 15
3816; GFX6-NEXT:    s_lshl_b32 s5, s5, 1
3817; GFX6-NEXT:    s_bfe_u32 s4, s4, 0x100000
3818; GFX6-NEXT:    s_andn2_b32 s8, 15, s11
3819; GFX6-NEXT:    s_lshl_b32 s1, s1, s4
3820; GFX6-NEXT:    s_and_b32 s4, s5, s12
3821; GFX6-NEXT:    s_lshr_b32 s4, s4, 1
3822; GFX6-NEXT:    s_bfe_u32 s5, s8, 0x100000
3823; GFX6-NEXT:    s_lshr_b32 s4, s4, s5
3824; GFX6-NEXT:    s_or_b32 s1, s1, s4
3825; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3826; GFX6-NEXT:    s_bfe_u32 s0, s0, 0x100000
3827; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
3828; GFX6-NEXT:    s_or_b32 s0, s0, s1
3829; GFX6-NEXT:    s_lshl_b32 s1, s2, s10
3830; GFX6-NEXT:    s_and_b32 s2, s6, s12
3831; GFX6-NEXT:    s_lshr_b32 s2, s2, 15
3832; GFX6-NEXT:    s_or_b32 s1, s1, s2
3833; GFX6-NEXT:    s_lshl_b32 s2, s3, s10
3834; GFX6-NEXT:    s_and_b32 s3, s7, s12
3835; GFX6-NEXT:    s_lshr_b32 s3, s3, 15
3836; GFX6-NEXT:    s_or_b32 s2, s2, s3
3837; GFX6-NEXT:    s_lshl_b32 s3, s6, 1
3838; GFX6-NEXT:    s_xor_b32 s5, s9, -1
3839; GFX6-NEXT:    s_lshl_b32 s4, s7, 1
3840; GFX6-NEXT:    s_lshr_b32 s6, s5, 16
3841; GFX6-NEXT:    s_and_b32 s7, s5, 15
3842; GFX6-NEXT:    s_andn2_b32 s5, 15, s5
3843; GFX6-NEXT:    s_and_b32 s3, s3, s12
3844; GFX6-NEXT:    s_bfe_u32 s7, s7, 0x100000
3845; GFX6-NEXT:    s_lshr_b32 s3, s3, 1
3846; GFX6-NEXT:    s_bfe_u32 s5, s5, 0x100000
3847; GFX6-NEXT:    s_lshl_b32 s1, s1, s7
3848; GFX6-NEXT:    s_lshr_b32 s3, s3, s5
3849; GFX6-NEXT:    s_or_b32 s1, s1, s3
3850; GFX6-NEXT:    s_and_b32 s3, s6, 15
3851; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x100000
3852; GFX6-NEXT:    s_andn2_b32 s5, 15, s6
3853; GFX6-NEXT:    s_lshl_b32 s2, s2, s3
3854; GFX6-NEXT:    s_and_b32 s3, s4, s12
3855; GFX6-NEXT:    s_lshr_b32 s3, s3, 1
3856; GFX6-NEXT:    s_bfe_u32 s4, s5, 0x100000
3857; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
3858; GFX6-NEXT:    s_or_b32 s2, s2, s3
3859; GFX6-NEXT:    s_bfe_u32 s2, s2, 0x100000
3860; GFX6-NEXT:    s_bfe_u32 s1, s1, 0x100000
3861; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
3862; GFX6-NEXT:    s_or_b32 s1, s1, s2
3863; GFX6-NEXT:    ; return to shader part epilog
3864;
3865; GFX8-LABEL: s_fshr_v4i16:
3866; GFX8:       ; %bb.0:
3867; GFX8-NEXT:    s_bfe_u32 s8, 1, 0x100000
3868; GFX8-NEXT:    s_bfe_u32 s9, s2, 0x100000
3869; GFX8-NEXT:    s_bfe_u32 s10, 15, 0x100000
3870; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
3871; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
3872; GFX8-NEXT:    s_lshl_b32 s0, s0, s8
3873; GFX8-NEXT:    s_lshr_b32 s9, s9, s10
3874; GFX8-NEXT:    s_or_b32 s0, s0, s9
3875; GFX8-NEXT:    s_lshl_b32 s6, s6, s8
3876; GFX8-NEXT:    s_lshr_b32 s9, s7, s10
3877; GFX8-NEXT:    s_lshl_b32 s2, s2, s8
3878; GFX8-NEXT:    s_xor_b32 s4, s4, -1
3879; GFX8-NEXT:    s_or_b32 s6, s6, s9
3880; GFX8-NEXT:    s_lshr_b32 s9, s4, 16
3881; GFX8-NEXT:    s_and_b32 s11, s4, 15
3882; GFX8-NEXT:    s_andn2_b32 s4, 15, s4
3883; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3884; GFX8-NEXT:    s_bfe_u32 s11, s11, 0x100000
3885; GFX8-NEXT:    s_lshr_b32 s2, s2, s8
3886; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
3887; GFX8-NEXT:    s_lshl_b32 s0, s0, s11
3888; GFX8-NEXT:    s_lshr_b32 s2, s2, s4
3889; GFX8-NEXT:    s_or_b32 s0, s0, s2
3890; GFX8-NEXT:    s_and_b32 s2, s9, 15
3891; GFX8-NEXT:    s_lshl_b32 s7, s7, s8
3892; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3893; GFX8-NEXT:    s_andn2_b32 s4, 15, s9
3894; GFX8-NEXT:    s_lshl_b32 s2, s6, s2
3895; GFX8-NEXT:    s_bfe_u32 s6, s7, 0x100000
3896; GFX8-NEXT:    s_lshr_b32 s6, s6, s8
3897; GFX8-NEXT:    s_bfe_u32 s4, s4, 0x100000
3898; GFX8-NEXT:    s_lshr_b32 s4, s6, s4
3899; GFX8-NEXT:    s_or_b32 s2, s2, s4
3900; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3901; GFX8-NEXT:    s_bfe_u32 s0, s0, 0x100000
3902; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
3903; GFX8-NEXT:    s_bfe_u32 s6, s3, 0x100000
3904; GFX8-NEXT:    s_or_b32 s0, s0, s2
3905; GFX8-NEXT:    s_lshr_b32 s2, s1, 16
3906; GFX8-NEXT:    s_lshr_b32 s4, s3, 16
3907; GFX8-NEXT:    s_lshl_b32 s1, s1, s8
3908; GFX8-NEXT:    s_lshr_b32 s6, s6, s10
3909; GFX8-NEXT:    s_or_b32 s1, s1, s6
3910; GFX8-NEXT:    s_lshl_b32 s2, s2, s8
3911; GFX8-NEXT:    s_lshr_b32 s6, s4, s10
3912; GFX8-NEXT:    s_lshl_b32 s3, s3, s8
3913; GFX8-NEXT:    s_xor_b32 s5, s5, -1
3914; GFX8-NEXT:    s_or_b32 s2, s2, s6
3915; GFX8-NEXT:    s_lshr_b32 s6, s5, 16
3916; GFX8-NEXT:    s_and_b32 s7, s5, 15
3917; GFX8-NEXT:    s_andn2_b32 s5, 15, s5
3918; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
3919; GFX8-NEXT:    s_bfe_u32 s7, s7, 0x100000
3920; GFX8-NEXT:    s_lshr_b32 s3, s3, s8
3921; GFX8-NEXT:    s_bfe_u32 s5, s5, 0x100000
3922; GFX8-NEXT:    s_lshl_b32 s1, s1, s7
3923; GFX8-NEXT:    s_lshr_b32 s3, s3, s5
3924; GFX8-NEXT:    s_or_b32 s1, s1, s3
3925; GFX8-NEXT:    s_and_b32 s3, s6, 15
3926; GFX8-NEXT:    s_lshl_b32 s4, s4, s8
3927; GFX8-NEXT:    s_bfe_u32 s3, s3, 0x100000
3928; GFX8-NEXT:    s_andn2_b32 s5, 15, s6
3929; GFX8-NEXT:    s_lshl_b32 s2, s2, s3
3930; GFX8-NEXT:    s_bfe_u32 s3, s4, 0x100000
3931; GFX8-NEXT:    s_lshr_b32 s3, s3, s8
3932; GFX8-NEXT:    s_bfe_u32 s4, s5, 0x100000
3933; GFX8-NEXT:    s_lshr_b32 s3, s3, s4
3934; GFX8-NEXT:    s_or_b32 s2, s2, s3
3935; GFX8-NEXT:    s_bfe_u32 s2, s2, 0x100000
3936; GFX8-NEXT:    s_bfe_u32 s1, s1, 0x100000
3937; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
3938; GFX8-NEXT:    s_or_b32 s1, s1, s2
3939; GFX8-NEXT:    ; return to shader part epilog
3940;
3941; GFX9-LABEL: s_fshr_v4i16:
3942; GFX9:       ; %bb.0:
3943; GFX9-NEXT:    s_mov_b32 s8, 0x10001
3944; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
3945; GFX9-NEXT:    s_mov_b32 s6, 0xf000f
3946; GFX9-NEXT:    s_lshl_b32 s0, s0, s8
3947; GFX9-NEXT:    s_lshl_b32 s9, s9, 1
3948; GFX9-NEXT:    s_and_b32 s7, s4, s6
3949; GFX9-NEXT:    s_andn2_b32 s4, s6, s4
3950; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s9
3951; GFX9-NEXT:    s_lshr_b32 s9, s0, 16
3952; GFX9-NEXT:    s_lshr_b32 s10, s4, 16
3953; GFX9-NEXT:    s_lshl_b32 s0, s0, s4
3954; GFX9-NEXT:    s_lshl_b32 s4, s9, s10
3955; GFX9-NEXT:    s_mov_b32 s9, 0xffff
3956; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
3957; GFX9-NEXT:    s_lshr_b32 s4, s2, 16
3958; GFX9-NEXT:    s_and_b32 s2, s2, s9
3959; GFX9-NEXT:    s_lshr_b32 s10, s7, 16
3960; GFX9-NEXT:    s_and_b32 s7, s7, s9
3961; GFX9-NEXT:    s_lshr_b32 s2, s2, s7
3962; GFX9-NEXT:    s_lshr_b32 s4, s4, s10
3963; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s4
3964; GFX9-NEXT:    s_or_b32 s0, s0, s2
3965; GFX9-NEXT:    s_and_b32 s2, s5, s6
3966; GFX9-NEXT:    s_andn2_b32 s4, s6, s5
3967; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
3968; GFX9-NEXT:    s_lshl_b32 s1, s1, s8
3969; GFX9-NEXT:    s_lshl_b32 s5, s5, 1
3970; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s5
3971; GFX9-NEXT:    s_lshr_b32 s5, s1, 16
3972; GFX9-NEXT:    s_lshr_b32 s6, s4, 16
3973; GFX9-NEXT:    s_lshl_b32 s1, s1, s4
3974; GFX9-NEXT:    s_lshl_b32 s4, s5, s6
3975; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
3976; GFX9-NEXT:    s_lshr_b32 s4, s3, 16
3977; GFX9-NEXT:    s_and_b32 s3, s3, s9
3978; GFX9-NEXT:    s_lshr_b32 s5, s2, 16
3979; GFX9-NEXT:    s_and_b32 s2, s2, s9
3980; GFX9-NEXT:    s_lshr_b32 s2, s3, s2
3981; GFX9-NEXT:    s_lshr_b32 s3, s4, s5
3982; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
3983; GFX9-NEXT:    s_or_b32 s1, s1, s2
3984; GFX9-NEXT:    ; return to shader part epilog
3985;
3986; GFX10-LABEL: s_fshr_v4i16:
3987; GFX10:       ; %bb.0:
3988; GFX10-NEXT:    s_mov_b32 s7, 0x10001
3989; GFX10-NEXT:    s_lshr_b32 s8, s0, 16
3990; GFX10-NEXT:    s_mov_b32 s6, 0xf000f
3991; GFX10-NEXT:    s_lshl_b32 s0, s0, s7
3992; GFX10-NEXT:    s_lshl_b32 s8, s8, 1
3993; GFX10-NEXT:    s_and_b32 s9, s4, s6
3994; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s8
3995; GFX10-NEXT:    s_andn2_b32 s4, s6, s4
3996; GFX10-NEXT:    s_lshr_b32 s8, s0, 16
3997; GFX10-NEXT:    s_lshr_b32 s10, s4, 16
3998; GFX10-NEXT:    s_lshl_b32 s0, s0, s4
3999; GFX10-NEXT:    s_lshl_b32 s4, s8, s10
4000; GFX10-NEXT:    s_mov_b32 s8, 0xffff
4001; GFX10-NEXT:    s_pack_ll_b32_b16 s0, s0, s4
4002; GFX10-NEXT:    s_lshr_b32 s4, s1, 16
4003; GFX10-NEXT:    s_lshl_b32 s1, s1, s7
4004; GFX10-NEXT:    s_lshl_b32 s4, s4, 1
4005; GFX10-NEXT:    s_and_b32 s7, s5, s6
4006; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4007; GFX10-NEXT:    s_andn2_b32 s4, s6, s5
4008; GFX10-NEXT:    s_lshr_b32 s5, s1, 16
4009; GFX10-NEXT:    s_lshr_b32 s6, s4, 16
4010; GFX10-NEXT:    s_lshr_b32 s10, s2, 16
4011; GFX10-NEXT:    s_and_b32 s2, s2, s8
4012; GFX10-NEXT:    s_and_b32 s11, s9, s8
4013; GFX10-NEXT:    s_lshr_b32 s9, s9, 16
4014; GFX10-NEXT:    s_lshl_b32 s1, s1, s4
4015; GFX10-NEXT:    s_lshl_b32 s4, s5, s6
4016; GFX10-NEXT:    s_lshr_b32 s5, s3, 16
4017; GFX10-NEXT:    s_and_b32 s3, s3, s8
4018; GFX10-NEXT:    s_and_b32 s6, s7, s8
4019; GFX10-NEXT:    s_lshr_b32 s7, s7, 16
4020; GFX10-NEXT:    s_lshr_b32 s2, s2, s11
4021; GFX10-NEXT:    s_lshr_b32 s9, s10, s9
4022; GFX10-NEXT:    s_lshr_b32 s3, s3, s6
4023; GFX10-NEXT:    s_lshr_b32 s5, s5, s7
4024; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, s9
4025; GFX10-NEXT:    s_pack_ll_b32_b16 s1, s1, s4
4026; GFX10-NEXT:    s_pack_ll_b32_b16 s3, s3, s5
4027; GFX10-NEXT:    s_or_b32 s0, s0, s2
4028; GFX10-NEXT:    s_or_b32 s1, s1, s3
4029; GFX10-NEXT:    ; return to shader part epilog
4030  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4031  %cast.result = bitcast <4 x i16> %result to <2 x i32>
4032  ret <2 x i32> %cast.result
4033}
4034
4035define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
4036; GFX6-LABEL: v_fshr_v4i16:
4037; GFX6:       ; %bb.0:
4038; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4039; GFX6-NEXT:    v_mov_b32_e32 v12, 0xffff
4040; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
4041; GFX6-NEXT:    v_and_b32_e32 v8, v8, v12
4042; GFX6-NEXT:    v_or_b32_e32 v8, v9, v8
4043; GFX6-NEXT:    v_lshlrev_b32_e32 v9, 16, v11
4044; GFX6-NEXT:    v_and_b32_e32 v10, v10, v12
4045; GFX6-NEXT:    s_mov_b32 s5, 0xffff
4046; GFX6-NEXT:    v_or_b32_e32 v9, v9, v10
4047; GFX6-NEXT:    s_bfe_u32 s4, 1, 0x100000
4048; GFX6-NEXT:    v_and_b32_e32 v10, s5, v4
4049; GFX6-NEXT:    v_lshlrev_b32_e32 v0, s4, v0
4050; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 15, v10
4051; GFX6-NEXT:    v_or_b32_e32 v0, v0, v10
4052; GFX6-NEXT:    v_and_b32_e32 v10, s5, v5
4053; GFX6-NEXT:    v_lshlrev_b32_e32 v1, s4, v1
4054; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 15, v10
4055; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4056; GFX6-NEXT:    v_or_b32_e32 v1, v1, v10
4057; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v4
4058; GFX6-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
4059; GFX6-NEXT:    v_and_b32_e32 v11, 15, v8
4060; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4061; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
4062; GFX6-NEXT:    v_and_b32_e32 v4, v4, v12
4063; GFX6-NEXT:    v_bfe_u32 v11, v11, 0, 16
4064; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 1, v4
4065; GFX6-NEXT:    v_bfe_u32 v8, v8, 0, 16
4066; GFX6-NEXT:    v_lshlrev_b32_e32 v0, v11, v0
4067; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v8, v4
4068; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
4069; GFX6-NEXT:    v_and_b32_e32 v4, 15, v10
4070; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v5
4071; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
4072; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
4073; GFX6-NEXT:    v_and_b32_e32 v8, 15, v8
4074; GFX6-NEXT:    v_lshlrev_b32_e32 v1, v4, v1
4075; GFX6-NEXT:    v_and_b32_e32 v4, v5, v12
4076; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 1, v4
4077; GFX6-NEXT:    v_bfe_u32 v5, v8, 0, 16
4078; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
4079; GFX6-NEXT:    v_or_b32_e32 v1, v1, v4
4080; GFX6-NEXT:    v_and_b32_e32 v4, v6, v12
4081; GFX6-NEXT:    v_lshlrev_b32_e32 v2, s4, v2
4082; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 15, v4
4083; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
4084; GFX6-NEXT:    v_and_b32_e32 v4, v7, v12
4085; GFX6-NEXT:    v_lshlrev_b32_e32 v3, s4, v3
4086; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 15, v4
4087; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
4088; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 1, v6
4089; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v9
4090; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 1, v7
4091; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
4092; GFX6-NEXT:    v_and_b32_e32 v8, 15, v6
4093; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v6
4094; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4095; GFX6-NEXT:    v_and_b32_e32 v4, v4, v12
4096; GFX6-NEXT:    v_bfe_u32 v8, v8, 0, 16
4097; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 1, v4
4098; GFX6-NEXT:    v_bfe_u32 v6, v6, 0, 16
4099; GFX6-NEXT:    v_lshlrev_b32_e32 v2, v8, v2
4100; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v6, v4
4101; GFX6-NEXT:    v_or_b32_e32 v2, v2, v4
4102; GFX6-NEXT:    v_and_b32_e32 v4, 15, v7
4103; GFX6-NEXT:    v_xor_b32_e32 v6, -1, v7
4104; GFX6-NEXT:    v_bfe_u32 v4, v4, 0, 16
4105; GFX6-NEXT:    v_and_b32_e32 v6, 15, v6
4106; GFX6-NEXT:    v_lshlrev_b32_e32 v3, v4, v3
4107; GFX6-NEXT:    v_and_b32_e32 v4, v5, v12
4108; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 1, v4
4109; GFX6-NEXT:    v_bfe_u32 v5, v6, 0, 16
4110; GFX6-NEXT:    v_lshrrev_b32_e32 v4, v5, v4
4111; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
4112; GFX6-NEXT:    s_setpc_b64 s[30:31]
4113;
4114; GFX8-LABEL: v_fshr_v4i16:
4115; GFX8:       ; %bb.0:
4116; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4117; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 1, v0
4118; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 15, v2
4119; GFX8-NEXT:    v_or_b32_e32 v6, v6, v7
4120; GFX8-NEXT:    v_mov_b32_e32 v7, 1
4121; GFX8-NEXT:    v_mov_b32_e32 v8, 15
4122; GFX8-NEXT:    v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4123; GFX8-NEXT:    v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4124; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4125; GFX8-NEXT:    v_or_b32_e32 v0, v0, v9
4126; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 1, v2
4127; GFX8-NEXT:    v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4128; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
4129; GFX8-NEXT:    v_and_b32_e32 v10, 15, v4
4130; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4131; GFX8-NEXT:    v_and_b32_e32 v4, 15, v4
4132; GFX8-NEXT:    v_lshrrev_b16_e32 v9, 1, v9
4133; GFX8-NEXT:    v_lshlrev_b16_e32 v6, v10, v6
4134; GFX8-NEXT:    v_lshrrev_b16_e32 v4, v4, v9
4135; GFX8-NEXT:    v_or_b32_e32 v4, v6, v4
4136; GFX8-NEXT:    v_and_b32_e32 v6, 15, v7
4137; GFX8-NEXT:    v_xor_b32_e32 v7, -1, v7
4138; GFX8-NEXT:    v_and_b32_e32 v7, 15, v7
4139; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 1, v2
4140; GFX8-NEXT:    v_lshlrev_b16_e32 v0, v6, v0
4141; GFX8-NEXT:    v_lshrrev_b16_e32 v2, v7, v2
4142; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4143; GFX8-NEXT:    v_mov_b32_e32 v2, 16
4144; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4145; GFX8-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4146; GFX8-NEXT:    v_lshlrev_b16_e32 v4, 1, v1
4147; GFX8-NEXT:    v_lshrrev_b16_e32 v6, 15, v3
4148; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
4149; GFX8-NEXT:    v_mov_b32_e32 v6, 1
4150; GFX8-NEXT:    v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4151; GFX8-NEXT:    v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4152; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
4153; GFX8-NEXT:    v_or_b32_e32 v1, v1, v7
4154; GFX8-NEXT:    v_lshlrev_b16_e32 v7, 1, v3
4155; GFX8-NEXT:    v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4156; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
4157; GFX8-NEXT:    v_and_b32_e32 v8, 15, v5
4158; GFX8-NEXT:    v_xor_b32_e32 v5, -1, v5
4159; GFX8-NEXT:    v_and_b32_e32 v5, 15, v5
4160; GFX8-NEXT:    v_lshrrev_b16_e32 v7, 1, v7
4161; GFX8-NEXT:    v_lshlrev_b16_e32 v4, v8, v4
4162; GFX8-NEXT:    v_lshrrev_b16_e32 v5, v5, v7
4163; GFX8-NEXT:    v_or_b32_e32 v4, v4, v5
4164; GFX8-NEXT:    v_and_b32_e32 v5, 15, v6
4165; GFX8-NEXT:    v_xor_b32_e32 v6, -1, v6
4166; GFX8-NEXT:    v_and_b32_e32 v6, 15, v6
4167; GFX8-NEXT:    v_lshrrev_b16_e32 v3, 1, v3
4168; GFX8-NEXT:    v_lshlrev_b16_e32 v1, v5, v1
4169; GFX8-NEXT:    v_lshrrev_b16_e32 v3, v6, v3
4170; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4171; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
4172; GFX8-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4173; GFX8-NEXT:    s_setpc_b64 s[30:31]
4174;
4175; GFX9-LABEL: v_fshr_v4i16:
4176; GFX9:       ; %bb.0:
4177; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4178; GFX9-NEXT:    s_mov_b32 s4, 0xf000f
4179; GFX9-NEXT:    v_and_b32_e32 v6, s4, v4
4180; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4181; GFX9-NEXT:    v_and_b32_e32 v4, s4, v4
4182; GFX9-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4183; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v4, v0
4184; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v6, v2
4185; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v5
4186; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4187; GFX9-NEXT:    v_and_b32_e32 v2, s4, v5
4188; GFX9-NEXT:    v_and_b32_e32 v4, s4, v4
4189; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4190; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v4, v1
4191; GFX9-NEXT:    v_pk_lshrrev_b16 v2, v2, v3
4192; GFX9-NEXT:    v_or_b32_e32 v1, v1, v2
4193; GFX9-NEXT:    s_setpc_b64 s[30:31]
4194;
4195; GFX10-LABEL: v_fshr_v4i16:
4196; GFX10:       ; %bb.0:
4197; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4198; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4199; GFX10-NEXT:    v_xor_b32_e32 v6, -1, v4
4200; GFX10-NEXT:    v_xor_b32_e32 v7, -1, v5
4201; GFX10-NEXT:    s_mov_b32 s4, 0xf000f
4202; GFX10-NEXT:    v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4203; GFX10-NEXT:    v_and_b32_e32 v4, s4, v4
4204; GFX10-NEXT:    v_and_b32_e32 v6, s4, v6
4205; GFX10-NEXT:    v_and_b32_e32 v5, s4, v5
4206; GFX10-NEXT:    v_and_b32_e32 v7, s4, v7
4207; GFX10-NEXT:    v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4208; GFX10-NEXT:    v_pk_lshrrev_b16 v2, v4, v2
4209; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v6, v0
4210; GFX10-NEXT:    v_pk_lshrrev_b16 v3, v5, v3
4211; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v7, v1
4212; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4213; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4214; GFX10-NEXT:    s_setpc_b64 s[30:31]
4215  %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4216  %cast.result = bitcast <4 x i16> %result to <4 x half>
4217  ret <4 x half> %cast.result
4218}
4219
4220define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
4221; GFX6-LABEL: s_fshr_i64:
4222; GFX6:       ; %bb.0:
4223; GFX6-NEXT:    s_and_b64 s[6:7], s[4:5], 63
4224; GFX6-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
4225; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4226; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
4227; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
4228; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4229; GFX6-NEXT:    ; return to shader part epilog
4230;
4231; GFX8-LABEL: s_fshr_i64:
4232; GFX8:       ; %bb.0:
4233; GFX8-NEXT:    s_and_b64 s[6:7], s[4:5], 63
4234; GFX8-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
4235; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4236; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
4237; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
4238; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4239; GFX8-NEXT:    ; return to shader part epilog
4240;
4241; GFX9-LABEL: s_fshr_i64:
4242; GFX9:       ; %bb.0:
4243; GFX9-NEXT:    s_and_b64 s[6:7], s[4:5], 63
4244; GFX9-NEXT:    s_andn2_b64 s[4:5], 63, s[4:5]
4245; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4246; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
4247; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s6
4248; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4249; GFX9-NEXT:    ; return to shader part epilog
4250;
4251; GFX10-LABEL: s_fshr_i64:
4252; GFX10:       ; %bb.0:
4253; GFX10-NEXT:    s_andn2_b64 s[6:7], 63, s[4:5]
4254; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4255; GFX10-NEXT:    s_and_b64 s[4:5], s[4:5], 63
4256; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s6
4257; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s4
4258; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4259; GFX10-NEXT:    ; return to shader part epilog
4260  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4261  ret i64 %result
4262}
4263
4264define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
4265; GCN-LABEL: s_fshr_i64_5:
4266; GCN:       ; %bb.0:
4267; GCN-NEXT:    s_lshl_b32 s1, s0, 27
4268; GCN-NEXT:    s_mov_b32 s0, 0
4269; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], 5
4270; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4271; GCN-NEXT:    ; return to shader part epilog
4272  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
4273  ret i64 %result
4274}
4275
4276define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
4277; GCN-LABEL: s_fshr_i64_32:
4278; GCN:       ; %bb.0:
4279; GCN-NEXT:    s_mov_b32 s1, s0
4280; GCN-NEXT:    s_mov_b32 s0, 0
4281; GCN-NEXT:    s_mov_b32 s2, s3
4282; GCN-NEXT:    s_mov_b32 s3, s0
4283; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4284; GCN-NEXT:    ; return to shader part epilog
4285  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
4286  ret i64 %result
4287}
4288
4289define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
4290; GCN-LABEL: s_fshr_i64_48:
4291; GCN:       ; %bb.0:
4292; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], 16
4293; GCN-NEXT:    s_lshr_b32 s2, s3, 16
4294; GCN-NEXT:    s_mov_b32 s3, 0
4295; GCN-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
4296; GCN-NEXT:    ; return to shader part epilog
4297  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
4298  ret i64 %result
4299}
4300
4301define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) {
4302; GFX6-LABEL: v_fshr_i64:
4303; GFX6:       ; %bb.0:
4304; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4305; GFX6-NEXT:    v_and_b32_e32 v5, 63, v4
4306; GFX6-NEXT:    v_xor_b32_e32 v4, -1, v4
4307; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
4308; GFX6-NEXT:    v_and_b32_e32 v4, 63, v4
4309; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
4310; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], v5
4311; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4312; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
4313; GFX6-NEXT:    s_setpc_b64 s[30:31]
4314;
4315; GFX8-LABEL: v_fshr_i64:
4316; GFX8:       ; %bb.0:
4317; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4318; GFX8-NEXT:    v_and_b32_e32 v5, 63, v4
4319; GFX8-NEXT:    v_xor_b32_e32 v4, -1, v4
4320; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4321; GFX8-NEXT:    v_and_b32_e32 v4, 63, v4
4322; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
4323; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
4324; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4325; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4326; GFX8-NEXT:    s_setpc_b64 s[30:31]
4327;
4328; GFX9-LABEL: v_fshr_i64:
4329; GFX9:       ; %bb.0:
4330; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4331; GFX9-NEXT:    v_and_b32_e32 v5, 63, v4
4332; GFX9-NEXT:    v_xor_b32_e32 v4, -1, v4
4333; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4334; GFX9-NEXT:    v_and_b32_e32 v4, 63, v4
4335; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v4, v[0:1]
4336; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v5, v[2:3]
4337; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4338; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
4339; GFX9-NEXT:    s_setpc_b64 s[30:31]
4340;
4341; GFX10-LABEL: v_fshr_i64:
4342; GFX10:       ; %bb.0:
4343; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4344; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4345; GFX10-NEXT:    v_xor_b32_e32 v5, -1, v4
4346; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4347; GFX10-NEXT:    v_and_b32_e32 v4, 63, v4
4348; GFX10-NEXT:    v_and_b32_e32 v5, 63, v5
4349; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v4, v[2:3]
4350; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v5, v[0:1]
4351; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
4352; GFX10-NEXT:    v_or_b32_e32 v1, v1, v3
4353; GFX10-NEXT:    s_setpc_b64 s[30:31]
4354  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4355  ret i64 %result
4356}
4357
4358define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) {
4359; GFX6-LABEL: v_fshr_i64_5:
4360; GFX6:       ; %bb.0:
4361; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4362; GFX6-NEXT:    v_mov_b32_e32 v4, v0
4363; GFX6-NEXT:    v_lshr_b64 v[0:1], v[2:3], 5
4364; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4365; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
4366; GFX6-NEXT:    s_setpc_b64 s[30:31]
4367;
4368; GFX8-LABEL: v_fshr_i64_5:
4369; GFX8:       ; %bb.0:
4370; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4371; GFX8-NEXT:    v_mov_b32_e32 v4, v0
4372; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
4373; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4374; GFX8-NEXT:    v_or_b32_e32 v1, v2, v1
4375; GFX8-NEXT:    s_setpc_b64 s[30:31]
4376;
4377; GFX9-LABEL: v_fshr_i64_5:
4378; GFX9:       ; %bb.0:
4379; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4380; GFX9-NEXT:    v_mov_b32_e32 v4, v0
4381; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
4382; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4383; GFX9-NEXT:    v_or_b32_e32 v1, v2, v1
4384; GFX9-NEXT:    s_setpc_b64 s[30:31]
4385;
4386; GFX10-LABEL: v_fshr_i64_5:
4387; GFX10:       ; %bb.0:
4388; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4389; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4390; GFX10-NEXT:    v_mov_b32_e32 v4, v0
4391; GFX10-NEXT:    v_lshrrev_b64 v[0:1], 5, v[2:3]
4392; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 27, v4
4393; GFX10-NEXT:    v_or_b32_e32 v1, v2, v1
4394; GFX10-NEXT:    s_setpc_b64 s[30:31]
4395  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
4396  ret i64 %result
4397}
4398
4399define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
4400; GFX6-LABEL: v_fshr_i64_32:
4401; GFX6:       ; %bb.0:
4402; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4403; GFX6-NEXT:    v_mov_b32_e32 v1, v0
4404; GFX6-NEXT:    v_mov_b32_e32 v0, v3
4405; GFX6-NEXT:    s_setpc_b64 s[30:31]
4406;
4407; GFX8-LABEL: v_fshr_i64_32:
4408; GFX8:       ; %bb.0:
4409; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4410; GFX8-NEXT:    v_mov_b32_e32 v1, v0
4411; GFX8-NEXT:    v_mov_b32_e32 v0, v3
4412; GFX8-NEXT:    s_setpc_b64 s[30:31]
4413;
4414; GFX9-LABEL: v_fshr_i64_32:
4415; GFX9:       ; %bb.0:
4416; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4417; GFX9-NEXT:    v_mov_b32_e32 v1, v0
4418; GFX9-NEXT:    v_mov_b32_e32 v0, v3
4419; GFX9-NEXT:    s_setpc_b64 s[30:31]
4420;
4421; GFX10-LABEL: v_fshr_i64_32:
4422; GFX10:       ; %bb.0:
4423; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4424; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4425; GFX10-NEXT:    v_mov_b32_e32 v1, v0
4426; GFX10-NEXT:    v_mov_b32_e32 v0, v3
4427; GFX10-NEXT:    s_setpc_b64 s[30:31]
4428  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
4429  ret i64 %result
4430}
4431
4432define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) {
4433; GFX6-LABEL: v_fshr_i64_48:
4434; GFX6:       ; %bb.0:
4435; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4436; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 16
4437; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
4438; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4439; GFX6-NEXT:    s_setpc_b64 s[30:31]
4440;
4441; GFX8-LABEL: v_fshr_i64_48:
4442; GFX8:       ; %bb.0:
4443; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4444; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
4445; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4446; GFX8-NEXT:    s_setpc_b64 s[30:31]
4447;
4448; GFX9-LABEL: v_fshr_i64_48:
4449; GFX9:       ; %bb.0:
4450; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4451; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
4452; GFX9-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4453; GFX9-NEXT:    s_setpc_b64 s[30:31]
4454;
4455; GFX10-LABEL: v_fshr_i64_48:
4456; GFX10:       ; %bb.0:
4457; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4458; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4459; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 16, v[0:1]
4460; GFX10-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4461; GFX10-NEXT:    s_setpc_b64 s[30:31]
4462  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
4463  ret i64 %result
4464}
4465
4466define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
4467; GFX6-LABEL: v_fshr_i64_ssv:
4468; GFX6:       ; %bb.0:
4469; GFX6-NEXT:    v_and_b32_e32 v2, 63, v0
4470; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
4471; GFX6-NEXT:    v_and_b32_e32 v0, 63, v0
4472; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4473; GFX6-NEXT:    v_lshl_b64 v[0:1], s[0:1], v0
4474; GFX6-NEXT:    v_lshr_b64 v[2:3], s[2:3], v2
4475; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
4476; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
4477; GFX6-NEXT:    ; return to shader part epilog
4478;
4479; GFX8-LABEL: v_fshr_i64_ssv:
4480; GFX8:       ; %bb.0:
4481; GFX8-NEXT:    v_and_b32_e32 v2, 63, v0
4482; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
4483; GFX8-NEXT:    v_and_b32_e32 v0, 63, v0
4484; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4485; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
4486; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
4487; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
4488; GFX8-NEXT:    v_or_b32_e32 v1, v1, v3
4489; GFX8-NEXT:    ; return to shader part epilog
4490;
4491; GFX9-LABEL: v_fshr_i64_ssv:
4492; GFX9:       ; %bb.0:
4493; GFX9-NEXT:    v_and_b32_e32 v2, 63, v0
4494; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
4495; GFX9-NEXT:    v_and_b32_e32 v0, 63, v0
4496; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4497; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v0, s[0:1]
4498; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v2, s[2:3]
4499; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
4500; GFX9-NEXT:    v_or_b32_e32 v1, v1, v3
4501; GFX9-NEXT:    ; return to shader part epilog
4502;
4503; GFX10-LABEL: v_fshr_i64_ssv:
4504; GFX10:       ; %bb.0:
4505; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
4506; GFX10-NEXT:    v_and_b32_e32 v0, 63, v0
4507; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4508; GFX10-NEXT:    v_and_b32_e32 v2, 63, v1
4509; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v0, s[2:3]
4510; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v2, s[0:1]
4511; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
4512; GFX10-NEXT:    v_or_b32_e32 v1, v3, v1
4513; GFX10-NEXT:    ; return to shader part epilog
4514  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4515  %cast = bitcast i64 %result to <2 x float>
4516  ret <2 x float> %cast
4517}
4518
4519define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
4520; GFX6-LABEL: v_fshr_i64_svs:
4521; GFX6:       ; %bb.0:
4522; GFX6-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4523; GFX6-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4524; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4525; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], s4
4526; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4527; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4528; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
4529; GFX6-NEXT:    ; return to shader part epilog
4530;
4531; GFX8-LABEL: v_fshr_i64_svs:
4532; GFX8:       ; %bb.0:
4533; GFX8-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4534; GFX8-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4535; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4536; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4537; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4538; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4539; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
4540; GFX8-NEXT:    ; return to shader part epilog
4541;
4542; GFX9-LABEL: v_fshr_i64_svs:
4543; GFX9:       ; %bb.0:
4544; GFX9-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4545; GFX9-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4546; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4547; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4548; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4549; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4550; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
4551; GFX9-NEXT:    ; return to shader part epilog
4552;
4553; GFX10-LABEL: v_fshr_i64_svs:
4554; GFX10:       ; %bb.0:
4555; GFX10-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4556; GFX10-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4557; GFX10-NEXT:    v_lshrrev_b64 v[0:1], s4, v[0:1]
4558; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4559; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
4560; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4561; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
4562; GFX10-NEXT:    ; return to shader part epilog
4563  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4564  %cast = bitcast i64 %result to <2 x float>
4565  ret <2 x float> %cast
4566}
4567
4568define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
4569; GFX6-LABEL: v_fshr_i64_vss:
4570; GFX6:       ; %bb.0:
4571; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
4572; GFX6-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4573; GFX6-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4574; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], s2
4575; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
4576; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
4577; GFX6-NEXT:    v_or_b32_e32 v1, s1, v1
4578; GFX6-NEXT:    ; return to shader part epilog
4579;
4580; GFX8-LABEL: v_fshr_i64_vss:
4581; GFX8:       ; %bb.0:
4582; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4583; GFX8-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4584; GFX8-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4585; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s2, v[0:1]
4586; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
4587; GFX8-NEXT:    v_or_b32_e32 v0, s0, v0
4588; GFX8-NEXT:    v_or_b32_e32 v1, s1, v1
4589; GFX8-NEXT:    ; return to shader part epilog
4590;
4591; GFX9-LABEL: v_fshr_i64_vss:
4592; GFX9:       ; %bb.0:
4593; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4594; GFX9-NEXT:    s_and_b64 s[4:5], s[2:3], 63
4595; GFX9-NEXT:    s_andn2_b64 s[2:3], 63, s[2:3]
4596; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s2, v[0:1]
4597; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], s4
4598; GFX9-NEXT:    v_or_b32_e32 v0, s0, v0
4599; GFX9-NEXT:    v_or_b32_e32 v1, s1, v1
4600; GFX9-NEXT:    ; return to shader part epilog
4601;
4602; GFX10-LABEL: v_fshr_i64_vss:
4603; GFX10:       ; %bb.0:
4604; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4605; GFX10-NEXT:    s_andn2_b64 s[4:5], 63, s[2:3]
4606; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], 63
4607; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
4608; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s4, v[0:1]
4609; GFX10-NEXT:    v_or_b32_e32 v0, s0, v0
4610; GFX10-NEXT:    v_or_b32_e32 v1, s1, v1
4611; GFX10-NEXT:    ; return to shader part epilog
4612  %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
4613  %cast = bitcast i64 %result to <2 x float>
4614  ret <2 x float> %cast
4615}
4616
4617define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
4618; GFX6-LABEL: s_fshr_v2i64:
4619; GFX6:       ; %bb.0:
4620; GFX6-NEXT:    s_and_b64 s[12:13], s[8:9], 63
4621; GFX6-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
4622; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4623; GFX6-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
4624; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
4625; GFX6-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4626; GFX6-NEXT:    s_and_b64 s[4:5], s[10:11], 63
4627; GFX6-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4628; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4629; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4630; GFX6-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
4631; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4632; GFX6-NEXT:    ; return to shader part epilog
4633;
4634; GFX8-LABEL: s_fshr_v2i64:
4635; GFX8:       ; %bb.0:
4636; GFX8-NEXT:    s_and_b64 s[12:13], s[8:9], 63
4637; GFX8-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
4638; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4639; GFX8-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
4640; GFX8-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
4641; GFX8-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4642; GFX8-NEXT:    s_and_b64 s[4:5], s[10:11], 63
4643; GFX8-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4644; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4645; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4646; GFX8-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
4647; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4648; GFX8-NEXT:    ; return to shader part epilog
4649;
4650; GFX9-LABEL: s_fshr_v2i64:
4651; GFX9:       ; %bb.0:
4652; GFX9-NEXT:    s_and_b64 s[12:13], s[8:9], 63
4653; GFX9-NEXT:    s_andn2_b64 s[8:9], 63, s[8:9]
4654; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4655; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
4656; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s12
4657; GFX9-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4658; GFX9-NEXT:    s_and_b64 s[4:5], s[10:11], 63
4659; GFX9-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4660; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4661; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4662; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], s4
4663; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4664; GFX9-NEXT:    ; return to shader part epilog
4665;
4666; GFX10-LABEL: s_fshr_v2i64:
4667; GFX10:       ; %bb.0:
4668; GFX10-NEXT:    s_andn2_b64 s[12:13], 63, s[8:9]
4669; GFX10-NEXT:    s_and_b64 s[8:9], s[8:9], 63
4670; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4671; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
4672; GFX10-NEXT:    s_andn2_b64 s[8:9], 63, s[10:11]
4673; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4674; GFX10-NEXT:    s_and_b64 s[10:11], s[10:11], 63
4675; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
4676; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
4677; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s10
4678; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
4679; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
4680; GFX10-NEXT:    ; return to shader part epilog
4681  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
4682  ret <2 x i64> %result
4683}
4684
4685define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
4686; GFX6-LABEL: v_fshr_v2i64:
4687; GFX6:       ; %bb.0:
4688; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4689; GFX6-NEXT:    v_and_b32_e32 v9, 63, v8
4690; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4691; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
4692; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
4693; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v8
4694; GFX6-NEXT:    v_lshr_b64 v[4:5], v[4:5], v9
4695; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v10
4696; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
4697; GFX6-NEXT:    v_or_b32_e32 v0, v0, v4
4698; GFX6-NEXT:    v_and_b32_e32 v4, 63, v10
4699; GFX6-NEXT:    v_and_b32_e32 v8, 63, v8
4700; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], v8
4701; GFX6-NEXT:    v_lshr_b64 v[6:7], v[6:7], v4
4702; GFX6-NEXT:    v_or_b32_e32 v1, v1, v5
4703; GFX6-NEXT:    v_or_b32_e32 v2, v2, v6
4704; GFX6-NEXT:    v_or_b32_e32 v3, v3, v7
4705; GFX6-NEXT:    s_setpc_b64 s[30:31]
4706;
4707; GFX8-LABEL: v_fshr_v2i64:
4708; GFX8:       ; %bb.0:
4709; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4710; GFX8-NEXT:    v_and_b32_e32 v9, 63, v8
4711; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v8
4712; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4713; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
4714; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
4715; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
4716; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v10
4717; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4718; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
4719; GFX8-NEXT:    v_and_b32_e32 v4, 63, v10
4720; GFX8-NEXT:    v_and_b32_e32 v8, 63, v8
4721; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
4722; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
4723; GFX8-NEXT:    v_or_b32_e32 v1, v1, v5
4724; GFX8-NEXT:    v_or_b32_e32 v2, v2, v6
4725; GFX8-NEXT:    v_or_b32_e32 v3, v3, v7
4726; GFX8-NEXT:    s_setpc_b64 s[30:31]
4727;
4728; GFX9-LABEL: v_fshr_v2i64:
4729; GFX9:       ; %bb.0:
4730; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4731; GFX9-NEXT:    v_and_b32_e32 v9, 63, v8
4732; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v8
4733; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4734; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
4735; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, v[0:1]
4736; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v9, v[4:5]
4737; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v10
4738; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4739; GFX9-NEXT:    v_or_b32_e32 v0, v0, v4
4740; GFX9-NEXT:    v_and_b32_e32 v4, 63, v10
4741; GFX9-NEXT:    v_and_b32_e32 v8, 63, v8
4742; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v8, v[2:3]
4743; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v4, v[6:7]
4744; GFX9-NEXT:    v_or_b32_e32 v1, v1, v5
4745; GFX9-NEXT:    v_or_b32_e32 v2, v2, v6
4746; GFX9-NEXT:    v_or_b32_e32 v3, v3, v7
4747; GFX9-NEXT:    s_setpc_b64 s[30:31]
4748;
4749; GFX10-LABEL: v_fshr_v2i64:
4750; GFX10:       ; %bb.0:
4751; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4752; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
4753; GFX10-NEXT:    v_xor_b32_e32 v9, -1, v8
4754; GFX10-NEXT:    v_xor_b32_e32 v11, -1, v10
4755; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
4756; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
4757; GFX10-NEXT:    v_and_b32_e32 v8, 63, v8
4758; GFX10-NEXT:    v_and_b32_e32 v9, 63, v9
4759; GFX10-NEXT:    v_and_b32_e32 v11, 63, v11
4760; GFX10-NEXT:    v_and_b32_e32 v10, 63, v10
4761; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v8, v[4:5]
4762; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v9, v[0:1]
4763; GFX10-NEXT:    v_lshlrev_b64 v[2:3], v11, v[2:3]
4764; GFX10-NEXT:    v_lshrrev_b64 v[6:7], v10, v[6:7]
4765; GFX10-NEXT:    v_or_b32_e32 v0, v0, v4
4766; GFX10-NEXT:    v_or_b32_e32 v1, v1, v5
4767; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
4768; GFX10-NEXT:    v_or_b32_e32 v3, v3, v7
4769; GFX10-NEXT:    s_setpc_b64 s[30:31]
4770  %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
4771  ret <2 x i64> %result
4772}
4773
4774define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
4775; GFX6-LABEL: s_fshr_i128:
4776; GFX6:       ; %bb.0:
4777; GFX6-NEXT:    s_movk_i32 s10, 0x7f
4778; GFX6-NEXT:    s_mov_b32 s11, 0
4779; GFX6-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4780; GFX6-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4781; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4782; GFX6-NEXT:    s_lshr_b32 s10, s1, 31
4783; GFX6-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
4784; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
4785; GFX6-NEXT:    s_sub_i32 s13, s8, 64
4786; GFX6-NEXT:    s_sub_i32 s9, 64, s8
4787; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
4788; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
4789; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
4790; GFX6-NEXT:    s_cselect_b32 s17, 1, 0
4791; GFX6-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
4792; GFX6-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
4793; GFX6-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4794; GFX6-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
4795; GFX6-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
4796; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
4797; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
4798; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
4799; GFX6-NEXT:    s_cmp_lg_u32 s17, 0
4800; GFX6-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
4801; GFX6-NEXT:    s_sub_i32 s14, s12, 64
4802; GFX6-NEXT:    s_sub_i32 s13, 64, s12
4803; GFX6-NEXT:    s_cmp_lt_u32 s12, 64
4804; GFX6-NEXT:    s_cselect_b32 s15, 1, 0
4805; GFX6-NEXT:    s_cmp_eq_u32 s12, 0
4806; GFX6-NEXT:    s_cselect_b32 s16, 1, 0
4807; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
4808; GFX6-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
4809; GFX6-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
4810; GFX6-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
4811; GFX6-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4812; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
4813; GFX6-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
4814; GFX6-NEXT:    s_cmp_lg_u32 s16, 0
4815; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
4816; GFX6-NEXT:    s_cmp_lg_u32 s15, 0
4817; GFX6-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
4818; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
4819; GFX6-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
4820; GFX6-NEXT:    ; return to shader part epilog
4821;
4822; GFX8-LABEL: s_fshr_i128:
4823; GFX8:       ; %bb.0:
4824; GFX8-NEXT:    s_movk_i32 s10, 0x7f
4825; GFX8-NEXT:    s_mov_b32 s11, 0
4826; GFX8-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4827; GFX8-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4828; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4829; GFX8-NEXT:    s_lshr_b32 s10, s1, 31
4830; GFX8-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
4831; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
4832; GFX8-NEXT:    s_sub_i32 s13, s8, 64
4833; GFX8-NEXT:    s_sub_i32 s9, 64, s8
4834; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
4835; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
4836; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
4837; GFX8-NEXT:    s_cselect_b32 s17, 1, 0
4838; GFX8-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
4839; GFX8-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
4840; GFX8-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4841; GFX8-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
4842; GFX8-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
4843; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
4844; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
4845; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
4846; GFX8-NEXT:    s_cmp_lg_u32 s17, 0
4847; GFX8-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
4848; GFX8-NEXT:    s_sub_i32 s14, s12, 64
4849; GFX8-NEXT:    s_sub_i32 s13, 64, s12
4850; GFX8-NEXT:    s_cmp_lt_u32 s12, 64
4851; GFX8-NEXT:    s_cselect_b32 s15, 1, 0
4852; GFX8-NEXT:    s_cmp_eq_u32 s12, 0
4853; GFX8-NEXT:    s_cselect_b32 s16, 1, 0
4854; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
4855; GFX8-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
4856; GFX8-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
4857; GFX8-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
4858; GFX8-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4859; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
4860; GFX8-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
4861; GFX8-NEXT:    s_cmp_lg_u32 s16, 0
4862; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
4863; GFX8-NEXT:    s_cmp_lg_u32 s15, 0
4864; GFX8-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
4865; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
4866; GFX8-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
4867; GFX8-NEXT:    ; return to shader part epilog
4868;
4869; GFX9-LABEL: s_fshr_i128:
4870; GFX9:       ; %bb.0:
4871; GFX9-NEXT:    s_movk_i32 s10, 0x7f
4872; GFX9-NEXT:    s_mov_b32 s11, 0
4873; GFX9-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4874; GFX9-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4875; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4876; GFX9-NEXT:    s_lshr_b32 s10, s1, 31
4877; GFX9-NEXT:    s_lshl_b64 s[14:15], s[0:1], 1
4878; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[10:11]
4879; GFX9-NEXT:    s_sub_i32 s13, s8, 64
4880; GFX9-NEXT:    s_sub_i32 s9, 64, s8
4881; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
4882; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
4883; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
4884; GFX9-NEXT:    s_cselect_b32 s17, 1, 0
4885; GFX9-NEXT:    s_lshl_b64 s[2:3], s[14:15], s8
4886; GFX9-NEXT:    s_lshr_b64 s[10:11], s[14:15], s9
4887; GFX9-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4888; GFX9-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
4889; GFX9-NEXT:    s_lshl_b64 s[10:11], s[14:15], s13
4890; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
4891; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
4892; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
4893; GFX9-NEXT:    s_cmp_lg_u32 s17, 0
4894; GFX9-NEXT:    s_cselect_b64 s[8:9], s[0:1], s[8:9]
4895; GFX9-NEXT:    s_sub_i32 s14, s12, 64
4896; GFX9-NEXT:    s_sub_i32 s13, 64, s12
4897; GFX9-NEXT:    s_cmp_lt_u32 s12, 64
4898; GFX9-NEXT:    s_cselect_b32 s15, 1, 0
4899; GFX9-NEXT:    s_cmp_eq_u32 s12, 0
4900; GFX9-NEXT:    s_cselect_b32 s16, 1, 0
4901; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], s12
4902; GFX9-NEXT:    s_lshr_b64 s[10:11], s[4:5], s12
4903; GFX9-NEXT:    s_lshl_b64 s[12:13], s[6:7], s13
4904; GFX9-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
4905; GFX9-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4906; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
4907; GFX9-NEXT:    s_cselect_b64 s[6:7], s[10:11], s[6:7]
4908; GFX9-NEXT:    s_cmp_lg_u32 s16, 0
4909; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
4910; GFX9-NEXT:    s_cmp_lg_u32 s15, 0
4911; GFX9-NEXT:    s_cselect_b64 s[6:7], s[0:1], 0
4912; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[4:5]
4913; GFX9-NEXT:    s_or_b64 s[2:3], s[8:9], s[6:7]
4914; GFX9-NEXT:    ; return to shader part epilog
4915;
4916; GFX10-LABEL: s_fshr_i128:
4917; GFX10:       ; %bb.0:
4918; GFX10-NEXT:    s_movk_i32 s10, 0x7f
4919; GFX10-NEXT:    s_mov_b32 s11, 0
4920; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
4921; GFX10-NEXT:    s_and_b64 s[12:13], s[8:9], s[10:11]
4922; GFX10-NEXT:    s_andn2_b64 s[8:9], s[10:11], s[8:9]
4923; GFX10-NEXT:    s_lshr_b32 s10, s1, 31
4924; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
4925; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[10:11]
4926; GFX10-NEXT:    s_sub_i32 s13, s8, 64
4927; GFX10-NEXT:    s_sub_i32 s9, 64, s8
4928; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
4929; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
4930; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
4931; GFX10-NEXT:    s_cselect_b32 s17, 1, 0
4932; GFX10-NEXT:    s_lshr_b64 s[10:11], s[0:1], s9
4933; GFX10-NEXT:    s_lshl_b64 s[14:15], s[2:3], s8
4934; GFX10-NEXT:    s_lshl_b64 s[8:9], s[0:1], s8
4935; GFX10-NEXT:    s_or_b64 s[10:11], s[10:11], s[14:15]
4936; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s13
4937; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
4938; GFX10-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
4939; GFX10-NEXT:    s_cselect_b64 s[0:1], s[10:11], s[0:1]
4940; GFX10-NEXT:    s_cmp_lg_u32 s17, 0
4941; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
4942; GFX10-NEXT:    s_sub_i32 s14, s12, 64
4943; GFX10-NEXT:    s_sub_i32 s10, 64, s12
4944; GFX10-NEXT:    s_cmp_lt_u32 s12, 64
4945; GFX10-NEXT:    s_cselect_b32 s15, 1, 0
4946; GFX10-NEXT:    s_cmp_eq_u32 s12, 0
4947; GFX10-NEXT:    s_cselect_b32 s16, 1, 0
4948; GFX10-NEXT:    s_lshr_b64 s[0:1], s[4:5], s12
4949; GFX10-NEXT:    s_lshl_b64 s[10:11], s[6:7], s10
4950; GFX10-NEXT:    s_lshr_b64 s[12:13], s[6:7], s12
4951; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[10:11]
4952; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], s14
4953; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
4954; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[6:7]
4955; GFX10-NEXT:    s_cmp_lg_u32 s16, 0
4956; GFX10-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
4957; GFX10-NEXT:    s_cmp_lg_u32 s15, 0
4958; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], 0
4959; GFX10-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
4960; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
4961; GFX10-NEXT:    ; return to shader part epilog
4962  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
4963  ret i128 %result
4964}
4965
4966define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
4967; GFX6-LABEL: v_fshr_i128:
4968; GFX6:       ; %bb.0:
4969; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4970; GFX6-NEXT:    s_movk_i32 s4, 0x7f
4971; GFX6-NEXT:    v_and_b32_e32 v14, s4, v8
4972; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v8
4973; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
4974; GFX6-NEXT:    v_and_b32_e32 v15, s4, v8
4975; GFX6-NEXT:    v_lshl_b64 v[8:9], v[0:1], 1
4976; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
4977; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
4978; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v15
4979; GFX6-NEXT:    v_lshr_b64 v[0:1], v[8:9], v0
4980; GFX6-NEXT:    v_lshl_b64 v[10:11], v[2:3], v15
4981; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v15
4982; GFX6-NEXT:    v_lshl_b64 v[12:13], v[8:9], v15
4983; GFX6-NEXT:    v_or_b32_e32 v10, v0, v10
4984; GFX6-NEXT:    v_or_b32_e32 v11, v1, v11
4985; GFX6-NEXT:    v_lshl_b64 v[0:1], v[8:9], v16
4986; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
4987; GFX6-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
4988; GFX6-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
4989; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
4990; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
4991; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
4992; GFX6-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
4993; GFX6-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
4994; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v14
4995; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], v14
4996; GFX6-NEXT:    v_lshl_b64 v[2:3], v[6:7], v2
4997; GFX6-NEXT:    v_subrev_i32_e32 v15, vcc, 64, v14
4998; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
4999; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
5000; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], v15
5001; GFX6-NEXT:    v_lshr_b64 v[8:9], v[6:7], v14
5002; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
5003; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5004; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5005; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
5006; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
5007; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
5008; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5009; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5010; GFX6-NEXT:    v_or_b32_e32 v0, v12, v0
5011; GFX6-NEXT:    v_or_b32_e32 v1, v13, v1
5012; GFX6-NEXT:    v_or_b32_e32 v2, v10, v2
5013; GFX6-NEXT:    v_or_b32_e32 v3, v11, v3
5014; GFX6-NEXT:    s_setpc_b64 s[30:31]
5015;
5016; GFX8-LABEL: v_fshr_i128:
5017; GFX8:       ; %bb.0:
5018; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5019; GFX8-NEXT:    s_movk_i32 s4, 0x7f
5020; GFX8-NEXT:    v_and_b32_e32 v14, s4, v8
5021; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v8
5022; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5023; GFX8-NEXT:    v_and_b32_e32 v15, s4, v8
5024; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
5025; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5026; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
5027; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v15
5028; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
5029; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
5030; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v15
5031; GFX8-NEXT:    v_lshlrev_b64 v[12:13], v15, v[8:9]
5032; GFX8-NEXT:    v_or_b32_e32 v10, v0, v10
5033; GFX8-NEXT:    v_or_b32_e32 v11, v1, v11
5034; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v16, v[8:9]
5035; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
5036; GFX8-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
5037; GFX8-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
5038; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
5039; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
5040; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
5041; GFX8-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
5042; GFX8-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
5043; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v14
5044; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
5045; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
5046; GFX8-NEXT:    v_subrev_u32_e32 v15, vcc, 64, v14
5047; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
5048; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
5049; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
5050; GFX8-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
5051; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
5052; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5053; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5054; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
5055; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
5056; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
5057; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5058; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5059; GFX8-NEXT:    v_or_b32_e32 v0, v12, v0
5060; GFX8-NEXT:    v_or_b32_e32 v1, v13, v1
5061; GFX8-NEXT:    v_or_b32_e32 v2, v10, v2
5062; GFX8-NEXT:    v_or_b32_e32 v3, v11, v3
5063; GFX8-NEXT:    s_setpc_b64 s[30:31]
5064;
5065; GFX9-LABEL: v_fshr_i128:
5066; GFX9:       ; %bb.0:
5067; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5068; GFX9-NEXT:    s_movk_i32 s4, 0x7f
5069; GFX9-NEXT:    v_and_b32_e32 v14, s4, v8
5070; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v8
5071; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5072; GFX9-NEXT:    v_and_b32_e32 v15, s4, v8
5073; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[0:1]
5074; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5075; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
5076; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v15
5077; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, v[8:9]
5078; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v15, v[2:3]
5079; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v15
5080; GFX9-NEXT:    v_lshlrev_b64 v[12:13], v15, v[8:9]
5081; GFX9-NEXT:    v_or_b32_e32 v10, v0, v10
5082; GFX9-NEXT:    v_or_b32_e32 v11, v1, v11
5083; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v16, v[8:9]
5084; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v15
5085; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v12, vcc
5086; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v13, vcc
5087; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
5088; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
5089; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
5090; GFX9-NEXT:    v_cndmask_b32_e32 v10, v0, v2, vcc
5091; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v14
5092; GFX9-NEXT:    v_cndmask_b32_e32 v11, v1, v3, vcc
5093; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v14, v[4:5]
5094; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, v[6:7]
5095; GFX9-NEXT:    v_subrev_u32_e32 v15, 64, v14
5096; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
5097; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
5098; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v15, v[6:7]
5099; GFX9-NEXT:    v_lshrrev_b64 v[8:9], v14, v[6:7]
5100; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
5101; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5102; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5103; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
5104; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s[4:5]
5105; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s[4:5]
5106; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5107; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5108; GFX9-NEXT:    v_or_b32_e32 v0, v12, v0
5109; GFX9-NEXT:    v_or_b32_e32 v1, v13, v1
5110; GFX9-NEXT:    v_or_b32_e32 v2, v10, v2
5111; GFX9-NEXT:    v_or_b32_e32 v3, v11, v3
5112; GFX9-NEXT:    s_setpc_b64 s[30:31]
5113;
5114; GFX10-LABEL: v_fshr_i128:
5115; GFX10:       ; %bb.0:
5116; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5117; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5118; GFX10-NEXT:    v_xor_b32_e32 v9, -1, v8
5119; GFX10-NEXT:    s_movk_i32 s4, 0x7f
5120; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5121; GFX10-NEXT:    v_lshrrev_b32_e32 v10, 31, v1
5122; GFX10-NEXT:    v_and_b32_e32 v19, s4, v8
5123; GFX10-NEXT:    v_and_b32_e32 v18, s4, v9
5124; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5125; GFX10-NEXT:    v_or_b32_e32 v2, v2, v10
5126; GFX10-NEXT:    v_sub_nc_u32_e32 v16, 64, v19
5127; GFX10-NEXT:    v_sub_nc_u32_e32 v10, 64, v18
5128; GFX10-NEXT:    v_subrev_nc_u32_e32 v21, 64, v18
5129; GFX10-NEXT:    v_subrev_nc_u32_e32 v20, 64, v19
5130; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v18, v[2:3]
5131; GFX10-NEXT:    v_lshrrev_b64 v[12:13], v19, v[4:5]
5132; GFX10-NEXT:    v_lshrrev_b64 v[10:11], v10, v[0:1]
5133; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v16, v[6:7]
5134; GFX10-NEXT:    v_lshlrev_b64 v[14:15], v18, v[0:1]
5135; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v21, v[0:1]
5136; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v18
5137; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v19
5138; GFX10-NEXT:    v_or_b32_e32 v10, v10, v8
5139; GFX10-NEXT:    v_or_b32_e32 v11, v11, v9
5140; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v20, v[6:7]
5141; GFX10-NEXT:    v_or_b32_e32 v12, v12, v16
5142; GFX10-NEXT:    v_or_b32_e32 v13, v13, v17
5143; GFX10-NEXT:    v_cndmask_b32_e32 v10, v0, v10, vcc_lo
5144; GFX10-NEXT:    v_cndmask_b32_e32 v11, v1, v11, vcc_lo
5145; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v19, v[6:7]
5146; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v12, s4
5147; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v19
5148; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v18
5149; GFX10-NEXT:    v_cndmask_b32_e64 v6, v9, v13, s4
5150; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v14, vcc_lo
5151; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v15, vcc_lo
5152; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s5
5153; GFX10-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s6
5154; GFX10-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s6
5155; GFX10-NEXT:    v_cndmask_b32_e64 v5, v6, v5, s5
5156; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, v0, s4
5157; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, v1, s4
5158; GFX10-NEXT:    v_or_b32_e32 v0, v14, v4
5159; GFX10-NEXT:    v_or_b32_e32 v1, v7, v5
5160; GFX10-NEXT:    v_or_b32_e32 v2, v2, v6
5161; GFX10-NEXT:    v_or_b32_e32 v3, v3, v8
5162; GFX10-NEXT:    s_setpc_b64 s[30:31]
5163  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5164  ret i128 %result
5165}
5166
5167define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
5168; GFX6-LABEL: v_fshr_i128_ssv:
5169; GFX6:       ; %bb.0:
5170; GFX6-NEXT:    s_movk_i32 s8, 0x7f
5171; GFX6-NEXT:    v_and_b32_e32 v6, s8, v0
5172; GFX6-NEXT:    v_xor_b32_e32 v0, -1, v0
5173; GFX6-NEXT:    s_mov_b32 s9, 0
5174; GFX6-NEXT:    v_and_b32_e32 v7, s8, v0
5175; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5176; GFX6-NEXT:    s_lshr_b32 s8, s1, 31
5177; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5178; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5179; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, 64, v7
5180; GFX6-NEXT:    v_lshr_b64 v[0:1], s[10:11], v0
5181; GFX6-NEXT:    v_lshl_b64 v[2:3], s[0:1], v7
5182; GFX6-NEXT:    v_subrev_i32_e32 v8, vcc, 64, v7
5183; GFX6-NEXT:    v_lshl_b64 v[4:5], s[10:11], v7
5184; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
5185; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
5186; GFX6-NEXT:    v_lshl_b64 v[0:1], s[10:11], v8
5187; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
5188; GFX6-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
5189; GFX6-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
5190; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5191; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5192; GFX6-NEXT:    v_mov_b32_e32 v2, s0
5193; GFX6-NEXT:    v_mov_b32_e32 v3, s1
5194; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
5195; GFX6-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
5196; GFX6-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
5197; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 64, v6
5198; GFX6-NEXT:    v_lshr_b64 v[0:1], s[4:5], v6
5199; GFX6-NEXT:    v_lshl_b64 v[2:3], s[6:7], v2
5200; GFX6-NEXT:    v_subrev_i32_e32 v11, vcc, 64, v6
5201; GFX6-NEXT:    v_or_b32_e32 v2, v0, v2
5202; GFX6-NEXT:    v_or_b32_e32 v3, v1, v3
5203; GFX6-NEXT:    v_lshr_b64 v[0:1], s[6:7], v11
5204; GFX6-NEXT:    v_lshr_b64 v[4:5], s[6:7], v6
5205; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
5206; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5207; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5208; GFX6-NEXT:    v_mov_b32_e32 v2, s4
5209; GFX6-NEXT:    v_mov_b32_e32 v3, s5
5210; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
5211; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5212; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5213; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
5214; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
5215; GFX6-NEXT:    v_or_b32_e32 v0, v8, v0
5216; GFX6-NEXT:    v_or_b32_e32 v1, v9, v1
5217; GFX6-NEXT:    v_or_b32_e32 v2, v7, v2
5218; GFX6-NEXT:    v_or_b32_e32 v3, v10, v3
5219; GFX6-NEXT:    ; return to shader part epilog
5220;
5221; GFX8-LABEL: v_fshr_i128_ssv:
5222; GFX8:       ; %bb.0:
5223; GFX8-NEXT:    s_movk_i32 s8, 0x7f
5224; GFX8-NEXT:    v_and_b32_e32 v6, s8, v0
5225; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
5226; GFX8-NEXT:    s_mov_b32 s9, 0
5227; GFX8-NEXT:    v_and_b32_e32 v7, s8, v0
5228; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5229; GFX8-NEXT:    s_lshr_b32 s8, s1, 31
5230; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5231; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5232; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 64, v7
5233; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v0, s[10:11]
5234; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v7, s[0:1]
5235; GFX8-NEXT:    v_subrev_u32_e32 v8, vcc, 64, v7
5236; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v7, s[10:11]
5237; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
5238; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
5239; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v8, s[10:11]
5240; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
5241; GFX8-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
5242; GFX8-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
5243; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5244; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5245; GFX8-NEXT:    v_mov_b32_e32 v2, s0
5246; GFX8-NEXT:    v_mov_b32_e32 v3, s1
5247; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
5248; GFX8-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
5249; GFX8-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
5250; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, 64, v6
5251; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v6, s[4:5]
5252; GFX8-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
5253; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, 64, v6
5254; GFX8-NEXT:    v_or_b32_e32 v2, v0, v2
5255; GFX8-NEXT:    v_or_b32_e32 v3, v1, v3
5256; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
5257; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v6, s[6:7]
5258; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
5259; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5260; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5261; GFX8-NEXT:    v_mov_b32_e32 v2, s4
5262; GFX8-NEXT:    v_mov_b32_e32 v3, s5
5263; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
5264; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5265; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5266; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
5267; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
5268; GFX8-NEXT:    v_or_b32_e32 v0, v8, v0
5269; GFX8-NEXT:    v_or_b32_e32 v1, v9, v1
5270; GFX8-NEXT:    v_or_b32_e32 v2, v7, v2
5271; GFX8-NEXT:    v_or_b32_e32 v3, v10, v3
5272; GFX8-NEXT:    ; return to shader part epilog
5273;
5274; GFX9-LABEL: v_fshr_i128_ssv:
5275; GFX9:       ; %bb.0:
5276; GFX9-NEXT:    s_movk_i32 s8, 0x7f
5277; GFX9-NEXT:    v_and_b32_e32 v6, s8, v0
5278; GFX9-NEXT:    v_xor_b32_e32 v0, -1, v0
5279; GFX9-NEXT:    s_mov_b32 s9, 0
5280; GFX9-NEXT:    v_and_b32_e32 v7, s8, v0
5281; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5282; GFX9-NEXT:    s_lshr_b32 s8, s1, 31
5283; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5284; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5285; GFX9-NEXT:    v_sub_u32_e32 v0, 64, v7
5286; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v0, s[10:11]
5287; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v7, s[0:1]
5288; GFX9-NEXT:    v_subrev_u32_e32 v8, 64, v7
5289; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v7, s[10:11]
5290; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
5291; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
5292; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v8, s[10:11]
5293; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v7
5294; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
5295; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v5, vcc
5296; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5297; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5298; GFX9-NEXT:    v_mov_b32_e32 v2, s0
5299; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
5300; GFX9-NEXT:    v_mov_b32_e32 v3, s1
5301; GFX9-NEXT:    v_cndmask_b32_e32 v7, v0, v2, vcc
5302; GFX9-NEXT:    v_sub_u32_e32 v2, 64, v6
5303; GFX9-NEXT:    v_cndmask_b32_e32 v10, v1, v3, vcc
5304; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v6, s[4:5]
5305; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[6:7]
5306; GFX9-NEXT:    v_subrev_u32_e32 v11, 64, v6
5307; GFX9-NEXT:    v_or_b32_e32 v2, v0, v2
5308; GFX9-NEXT:    v_or_b32_e32 v3, v1, v3
5309; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v11, s[6:7]
5310; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v6, s[6:7]
5311; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
5312; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
5313; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
5314; GFX9-NEXT:    v_mov_b32_e32 v2, s4
5315; GFX9-NEXT:    v_mov_b32_e32 v3, s5
5316; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v6
5317; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5318; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5319; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v4, vcc
5320; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
5321; GFX9-NEXT:    v_or_b32_e32 v0, v8, v0
5322; GFX9-NEXT:    v_or_b32_e32 v1, v9, v1
5323; GFX9-NEXT:    v_or_b32_e32 v2, v7, v2
5324; GFX9-NEXT:    v_or_b32_e32 v3, v10, v3
5325; GFX9-NEXT:    ; return to shader part epilog
5326;
5327; GFX10-LABEL: v_fshr_i128_ssv:
5328; GFX10:       ; %bb.0:
5329; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v0
5330; GFX10-NEXT:    s_movk_i32 s10, 0x7f
5331; GFX10-NEXT:    s_mov_b32 s9, 0
5332; GFX10-NEXT:    v_and_b32_e32 v13, s10, v0
5333; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5334; GFX10-NEXT:    v_and_b32_e32 v12, s10, v1
5335; GFX10-NEXT:    s_lshr_b32 s8, s1, 31
5336; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5337; GFX10-NEXT:    v_sub_nc_u32_e32 v8, 64, v13
5338; GFX10-NEXT:    s_or_b64 s[8:9], s[2:3], s[8:9]
5339; GFX10-NEXT:    v_sub_nc_u32_e32 v2, 64, v12
5340; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v12, s[8:9]
5341; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, 64, v12
5342; GFX10-NEXT:    v_subrev_nc_u32_e32 v14, 64, v13
5343; GFX10-NEXT:    v_lshrrev_b64 v[4:5], v13, s[4:5]
5344; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v2, s[0:1]
5345; GFX10-NEXT:    v_lshlrev_b64 v[8:9], v8, s[6:7]
5346; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v10, s[0:1]
5347; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v12
5348; GFX10-NEXT:    v_lshlrev_b64 v[6:7], v12, s[0:1]
5349; GFX10-NEXT:    v_cmp_gt_u32_e64 s0, 64, v13
5350; GFX10-NEXT:    v_or_b32_e32 v2, v2, v0
5351; GFX10-NEXT:    v_or_b32_e32 v3, v3, v1
5352; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v14, s[6:7]
5353; GFX10-NEXT:    v_or_b32_e32 v4, v4, v8
5354; GFX10-NEXT:    v_or_b32_e32 v5, v5, v9
5355; GFX10-NEXT:    v_cndmask_b32_e32 v8, v10, v2, vcc_lo
5356; GFX10-NEXT:    v_cndmask_b32_e32 v10, v11, v3, vcc_lo
5357; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v13, s[6:7]
5358; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
5359; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, 0, v13
5360; GFX10-NEXT:    v_cmp_eq_u32_e64 s2, 0, v12
5361; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s0
5362; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v6, vcc_lo
5363; GFX10-NEXT:    v_cndmask_b32_e32 v4, 0, v7, vcc_lo
5364; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s4, s1
5365; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, s8, s2
5366; GFX10-NEXT:    v_cndmask_b32_e64 v7, v10, s9, s2
5367; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, s5, s1
5368; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
5369; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
5370; GFX10-NEXT:    v_or_b32_e32 v0, v6, v0
5371; GFX10-NEXT:    v_or_b32_e32 v1, v4, v1
5372; GFX10-NEXT:    v_or_b32_e32 v2, v5, v2
5373; GFX10-NEXT:    v_or_b32_e32 v3, v7, v3
5374; GFX10-NEXT:    ; return to shader part epilog
5375  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5376  %cast.result = bitcast i128 %result to <4 x float>
5377  ret <4 x float> %cast.result
5378}
5379
5380define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
5381; GFX6-LABEL: v_fshr_i128_svs:
5382; GFX6:       ; %bb.0:
5383; GFX6-NEXT:    s_movk_i32 s6, 0x7f
5384; GFX6-NEXT:    s_mov_b32 s7, 0
5385; GFX6-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5386; GFX6-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5387; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5388; GFX6-NEXT:    s_lshr_b32 s6, s1, 31
5389; GFX6-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5390; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
5391; GFX6-NEXT:    s_sub_i32 s9, s4, 64
5392; GFX6-NEXT:    s_sub_i32 s5, 64, s4
5393; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
5394; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
5395; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
5396; GFX6-NEXT:    s_cselect_b32 s13, 1, 0
5397; GFX6-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
5398; GFX6-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
5399; GFX6-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5400; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
5401; GFX6-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
5402; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
5403; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5404; GFX6-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5405; GFX6-NEXT:    s_cmp_lg_u32 s13, 0
5406; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5407; GFX6-NEXT:    s_sub_i32 s4, s8, 64
5408; GFX6-NEXT:    s_sub_i32 s5, 64, s8
5409; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
5410; GFX6-NEXT:    s_cselect_b32 s6, 1, 0
5411; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
5412; GFX6-NEXT:    v_lshr_b64 v[4:5], v[0:1], s8
5413; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s5
5414; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
5415; GFX6-NEXT:    v_lshr_b64 v[8:9], v[2:3], s8
5416; GFX6-NEXT:    v_lshr_b64 v[2:3], v[2:3], s4
5417; GFX6-NEXT:    s_and_b32 s4, 1, s6
5418; GFX6-NEXT:    v_or_b32_e32 v4, v4, v6
5419; GFX6-NEXT:    v_or_b32_e32 v5, v5, v7
5420; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5421; GFX6-NEXT:    s_and_b32 s4, 1, s7
5422; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
5423; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
5424; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5425; GFX6-NEXT:    s_and_b32 s4, 1, s6
5426; GFX6-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
5427; GFX6-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
5428; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5429; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5430; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5431; GFX6-NEXT:    v_or_b32_e32 v0, s2, v0
5432; GFX6-NEXT:    v_or_b32_e32 v1, s3, v1
5433; GFX6-NEXT:    v_or_b32_e32 v2, s0, v2
5434; GFX6-NEXT:    v_or_b32_e32 v3, s1, v3
5435; GFX6-NEXT:    ; return to shader part epilog
5436;
5437; GFX8-LABEL: v_fshr_i128_svs:
5438; GFX8:       ; %bb.0:
5439; GFX8-NEXT:    s_movk_i32 s6, 0x7f
5440; GFX8-NEXT:    s_mov_b32 s7, 0
5441; GFX8-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5442; GFX8-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5443; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5444; GFX8-NEXT:    s_lshr_b32 s6, s1, 31
5445; GFX8-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5446; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
5447; GFX8-NEXT:    s_sub_i32 s9, s4, 64
5448; GFX8-NEXT:    s_sub_i32 s5, 64, s4
5449; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
5450; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
5451; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
5452; GFX8-NEXT:    s_cselect_b32 s13, 1, 0
5453; GFX8-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
5454; GFX8-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
5455; GFX8-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5456; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
5457; GFX8-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
5458; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
5459; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5460; GFX8-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5461; GFX8-NEXT:    s_cmp_lg_u32 s13, 0
5462; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5463; GFX8-NEXT:    s_sub_i32 s4, s8, 64
5464; GFX8-NEXT:    s_sub_i32 s5, 64, s8
5465; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
5466; GFX8-NEXT:    s_cselect_b32 s6, 1, 0
5467; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
5468; GFX8-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
5469; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
5470; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
5471; GFX8-NEXT:    v_lshrrev_b64 v[8:9], s8, v[2:3]
5472; GFX8-NEXT:    v_lshrrev_b64 v[2:3], s4, v[2:3]
5473; GFX8-NEXT:    s_and_b32 s4, 1, s6
5474; GFX8-NEXT:    v_or_b32_e32 v4, v4, v6
5475; GFX8-NEXT:    v_or_b32_e32 v5, v5, v7
5476; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5477; GFX8-NEXT:    s_and_b32 s4, 1, s7
5478; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
5479; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
5480; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5481; GFX8-NEXT:    s_and_b32 s4, 1, s6
5482; GFX8-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
5483; GFX8-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
5484; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5485; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5486; GFX8-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5487; GFX8-NEXT:    v_or_b32_e32 v0, s2, v0
5488; GFX8-NEXT:    v_or_b32_e32 v1, s3, v1
5489; GFX8-NEXT:    v_or_b32_e32 v2, s0, v2
5490; GFX8-NEXT:    v_or_b32_e32 v3, s1, v3
5491; GFX8-NEXT:    ; return to shader part epilog
5492;
5493; GFX9-LABEL: v_fshr_i128_svs:
5494; GFX9:       ; %bb.0:
5495; GFX9-NEXT:    s_movk_i32 s6, 0x7f
5496; GFX9-NEXT:    s_mov_b32 s7, 0
5497; GFX9-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5498; GFX9-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5499; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5500; GFX9-NEXT:    s_lshr_b32 s6, s1, 31
5501; GFX9-NEXT:    s_lshl_b64 s[10:11], s[0:1], 1
5502; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[6:7]
5503; GFX9-NEXT:    s_sub_i32 s9, s4, 64
5504; GFX9-NEXT:    s_sub_i32 s5, 64, s4
5505; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
5506; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
5507; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
5508; GFX9-NEXT:    s_cselect_b32 s13, 1, 0
5509; GFX9-NEXT:    s_lshl_b64 s[2:3], s[10:11], s4
5510; GFX9-NEXT:    s_lshr_b64 s[6:7], s[10:11], s5
5511; GFX9-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5512; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[4:5]
5513; GFX9-NEXT:    s_lshl_b64 s[6:7], s[10:11], s9
5514; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
5515; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5516; GFX9-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
5517; GFX9-NEXT:    s_cmp_lg_u32 s13, 0
5518; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
5519; GFX9-NEXT:    s_sub_i32 s4, s8, 64
5520; GFX9-NEXT:    s_sub_i32 s5, 64, s8
5521; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
5522; GFX9-NEXT:    s_cselect_b32 s6, 1, 0
5523; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
5524; GFX9-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
5525; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s5, v[2:3]
5526; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
5527; GFX9-NEXT:    v_lshrrev_b64 v[8:9], s8, v[2:3]
5528; GFX9-NEXT:    v_lshrrev_b64 v[2:3], s4, v[2:3]
5529; GFX9-NEXT:    s_and_b32 s4, 1, s6
5530; GFX9-NEXT:    v_or_b32_e32 v4, v4, v6
5531; GFX9-NEXT:    v_or_b32_e32 v5, v5, v7
5532; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5533; GFX9-NEXT:    s_and_b32 s4, 1, s7
5534; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
5535; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
5536; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5537; GFX9-NEXT:    s_and_b32 s4, 1, s6
5538; GFX9-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
5539; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
5540; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5541; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v8, vcc
5542; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v9, vcc
5543; GFX9-NEXT:    v_or_b32_e32 v0, s2, v0
5544; GFX9-NEXT:    v_or_b32_e32 v1, s3, v1
5545; GFX9-NEXT:    v_or_b32_e32 v2, s0, v2
5546; GFX9-NEXT:    v_or_b32_e32 v3, s1, v3
5547; GFX9-NEXT:    ; return to shader part epilog
5548;
5549; GFX10-LABEL: v_fshr_i128_svs:
5550; GFX10:       ; %bb.0:
5551; GFX10-NEXT:    s_movk_i32 s6, 0x7f
5552; GFX10-NEXT:    s_mov_b32 s7, 0
5553; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5554; GFX10-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5555; GFX10-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5556; GFX10-NEXT:    s_lshr_b32 s6, s1, 31
5557; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
5558; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
5559; GFX10-NEXT:    s_sub_i32 s9, s4, 64
5560; GFX10-NEXT:    s_sub_i32 s5, 64, s4
5561; GFX10-NEXT:    s_cmp_lt_u32 s4, 64
5562; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s8, v[0:1]
5563; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
5564; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
5565; GFX10-NEXT:    s_cselect_b32 s13, 1, 0
5566; GFX10-NEXT:    s_lshr_b64 s[6:7], s[0:1], s5
5567; GFX10-NEXT:    s_lshl_b64 s[10:11], s[2:3], s4
5568; GFX10-NEXT:    s_lshl_b64 s[4:5], s[0:1], s4
5569; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
5570; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s9
5571; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
5572; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], 0
5573; GFX10-NEXT:    s_cselect_b64 s[0:1], s[6:7], s[0:1]
5574; GFX10-NEXT:    s_cmp_lg_u32 s13, 0
5575; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
5576; GFX10-NEXT:    s_sub_i32 s0, 64, s8
5577; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s0, v[2:3]
5578; GFX10-NEXT:    s_sub_i32 s0, s8, 64
5579; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
5580; GFX10-NEXT:    v_lshrrev_b64 v[8:9], s0, v[2:3]
5581; GFX10-NEXT:    s_cselect_b32 vcc_lo, 1, 0
5582; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
5583; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
5584; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
5585; GFX10-NEXT:    s_cselect_b32 s0, 1, 0
5586; GFX10-NEXT:    s_and_b32 s1, 1, vcc_lo
5587; GFX10-NEXT:    s_and_b32 s0, 1, s0
5588; GFX10-NEXT:    v_lshrrev_b64 v[2:3], s8, v[2:3]
5589; GFX10-NEXT:    v_cndmask_b32_e32 v4, v8, v4, vcc_lo
5590; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v5, vcc_lo
5591; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
5592; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, s1
5593; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc_lo
5594; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
5595; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s0
5596; GFX10-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s0
5597; GFX10-NEXT:    v_or_b32_e32 v0, s4, v0
5598; GFX10-NEXT:    v_or_b32_e32 v1, s5, v1
5599; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
5600; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
5601; GFX10-NEXT:    ; return to shader part epilog
5602  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5603  %cast.result = bitcast i128 %result to <4 x float>
5604  ret <4 x float> %cast.result
5605}
5606
5607define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
5608; GFX6-LABEL: v_fshr_i128_vss:
5609; GFX6:       ; %bb.0:
5610; GFX6-NEXT:    s_mov_b64 s[6:7], 0x7f
5611; GFX6-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5612; GFX6-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5613; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
5614; GFX6-NEXT:    s_sub_i32 s5, s4, 64
5615; GFX6-NEXT:    s_sub_i32 s6, 64, s4
5616; GFX6-NEXT:    v_lshl_b64 v[4:5], v[0:1], 1
5617; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5618; GFX6-NEXT:    s_cmp_lt_u32 s4, 64
5619; GFX6-NEXT:    v_or_b32_e32 v2, v2, v0
5620; GFX6-NEXT:    s_cselect_b32 s7, 1, 0
5621; GFX6-NEXT:    s_cmp_eq_u32 s4, 0
5622; GFX6-NEXT:    s_cselect_b32 s9, 1, 0
5623; GFX6-NEXT:    v_lshr_b64 v[0:1], v[4:5], s6
5624; GFX6-NEXT:    v_lshl_b64 v[6:7], v[2:3], s4
5625; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], s4
5626; GFX6-NEXT:    s_and_b32 s4, 1, s7
5627; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5628; GFX6-NEXT:    s_and_b32 s4, 1, s9
5629; GFX6-NEXT:    s_sub_i32 s10, s8, 64
5630; GFX6-NEXT:    s_sub_i32 s9, 64, s8
5631; GFX6-NEXT:    v_or_b32_e32 v6, v0, v6
5632; GFX6-NEXT:    v_or_b32_e32 v7, v1, v7
5633; GFX6-NEXT:    v_lshl_b64 v[0:1], v[4:5], s5
5634; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
5635; GFX6-NEXT:    s_cselect_b32 s11, 1, 0
5636; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
5637; GFX6-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
5638; GFX6-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
5639; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
5640; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5641; GFX6-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5642; GFX6-NEXT:    s_cselect_b32 s12, 1, 0
5643; GFX6-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
5644; GFX6-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
5645; GFX6-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
5646; GFX6-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5647; GFX6-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5648; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
5649; GFX6-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
5650; GFX6-NEXT:    s_cmp_lg_u32 s12, 0
5651; GFX6-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5652; GFX6-NEXT:    s_cmp_lg_u32 s11, 0
5653; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
5654; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
5655; GFX6-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
5656; GFX6-NEXT:    v_or_b32_e32 v0, s0, v4
5657; GFX6-NEXT:    v_or_b32_e32 v1, s1, v5
5658; GFX6-NEXT:    v_or_b32_e32 v2, s2, v2
5659; GFX6-NEXT:    v_or_b32_e32 v3, s3, v3
5660; GFX6-NEXT:    ; return to shader part epilog
5661;
5662; GFX8-LABEL: v_fshr_i128_vss:
5663; GFX8:       ; %bb.0:
5664; GFX8-NEXT:    s_mov_b64 s[6:7], 0x7f
5665; GFX8-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5666; GFX8-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5667; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5668; GFX8-NEXT:    s_sub_i32 s5, s4, 64
5669; GFX8-NEXT:    s_sub_i32 s6, 64, s4
5670; GFX8-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
5671; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5672; GFX8-NEXT:    s_cmp_lt_u32 s4, 64
5673; GFX8-NEXT:    v_or_b32_e32 v2, v2, v0
5674; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
5675; GFX8-NEXT:    s_cmp_eq_u32 s4, 0
5676; GFX8-NEXT:    s_cselect_b32 s9, 1, 0
5677; GFX8-NEXT:    v_lshrrev_b64 v[0:1], s6, v[4:5]
5678; GFX8-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
5679; GFX8-NEXT:    v_lshlrev_b64 v[8:9], s4, v[4:5]
5680; GFX8-NEXT:    s_and_b32 s4, 1, s7
5681; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5682; GFX8-NEXT:    s_and_b32 s4, 1, s9
5683; GFX8-NEXT:    s_sub_i32 s10, s8, 64
5684; GFX8-NEXT:    s_sub_i32 s9, 64, s8
5685; GFX8-NEXT:    v_or_b32_e32 v6, v0, v6
5686; GFX8-NEXT:    v_or_b32_e32 v7, v1, v7
5687; GFX8-NEXT:    v_lshlrev_b64 v[0:1], s5, v[4:5]
5688; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
5689; GFX8-NEXT:    s_cselect_b32 s11, 1, 0
5690; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
5691; GFX8-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
5692; GFX8-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
5693; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
5694; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5695; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5696; GFX8-NEXT:    s_cselect_b32 s12, 1, 0
5697; GFX8-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
5698; GFX8-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
5699; GFX8-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
5700; GFX8-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5701; GFX8-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5702; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
5703; GFX8-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
5704; GFX8-NEXT:    s_cmp_lg_u32 s12, 0
5705; GFX8-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5706; GFX8-NEXT:    s_cmp_lg_u32 s11, 0
5707; GFX8-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
5708; GFX8-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
5709; GFX8-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
5710; GFX8-NEXT:    v_or_b32_e32 v0, s0, v4
5711; GFX8-NEXT:    v_or_b32_e32 v1, s1, v5
5712; GFX8-NEXT:    v_or_b32_e32 v2, s2, v2
5713; GFX8-NEXT:    v_or_b32_e32 v3, s3, v3
5714; GFX8-NEXT:    ; return to shader part epilog
5715;
5716; GFX9-LABEL: v_fshr_i128_vss:
5717; GFX9:       ; %bb.0:
5718; GFX9-NEXT:    s_mov_b64 s[6:7], 0x7f
5719; GFX9-NEXT:    s_and_b64 s[8:9], s[4:5], s[6:7]
5720; GFX9-NEXT:    s_andn2_b64 s[4:5], s[6:7], s[4:5]
5721; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5722; GFX9-NEXT:    s_sub_i32 s5, s4, 64
5723; GFX9-NEXT:    s_sub_i32 s6, 64, s4
5724; GFX9-NEXT:    v_lshlrev_b64 v[4:5], 1, v[0:1]
5725; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
5726; GFX9-NEXT:    s_cmp_lt_u32 s4, 64
5727; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
5728; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
5729; GFX9-NEXT:    s_cmp_eq_u32 s4, 0
5730; GFX9-NEXT:    s_cselect_b32 s9, 1, 0
5731; GFX9-NEXT:    v_lshrrev_b64 v[0:1], s6, v[4:5]
5732; GFX9-NEXT:    v_lshlrev_b64 v[6:7], s4, v[2:3]
5733; GFX9-NEXT:    v_lshlrev_b64 v[8:9], s4, v[4:5]
5734; GFX9-NEXT:    s_and_b32 s4, 1, s7
5735; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5736; GFX9-NEXT:    s_and_b32 s4, 1, s9
5737; GFX9-NEXT:    s_sub_i32 s10, s8, 64
5738; GFX9-NEXT:    s_sub_i32 s9, 64, s8
5739; GFX9-NEXT:    v_or_b32_e32 v6, v0, v6
5740; GFX9-NEXT:    v_or_b32_e32 v7, v1, v7
5741; GFX9-NEXT:    v_lshlrev_b64 v[0:1], s5, v[4:5]
5742; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
5743; GFX9-NEXT:    s_cselect_b32 s11, 1, 0
5744; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
5745; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v8, vcc
5746; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v9, vcc
5747; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
5748; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
5749; GFX9-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
5750; GFX9-NEXT:    s_cselect_b32 s12, 1, 0
5751; GFX9-NEXT:    s_lshr_b64 s[4:5], s[2:3], s8
5752; GFX9-NEXT:    s_lshr_b64 s[6:7], s[0:1], s8
5753; GFX9-NEXT:    s_lshl_b64 s[8:9], s[2:3], s9
5754; GFX9-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
5755; GFX9-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5756; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
5757; GFX9-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[2:3]
5758; GFX9-NEXT:    s_cmp_lg_u32 s12, 0
5759; GFX9-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5760; GFX9-NEXT:    s_cmp_lg_u32 s11, 0
5761; GFX9-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
5762; GFX9-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
5763; GFX9-NEXT:    s_cselect_b64 s[2:3], s[4:5], 0
5764; GFX9-NEXT:    v_or_b32_e32 v0, s0, v4
5765; GFX9-NEXT:    v_or_b32_e32 v1, s1, v5
5766; GFX9-NEXT:    v_or_b32_e32 v2, s2, v2
5767; GFX9-NEXT:    v_or_b32_e32 v3, s3, v3
5768; GFX9-NEXT:    ; return to shader part epilog
5769;
5770; GFX10-LABEL: v_fshr_i128_vss:
5771; GFX10:       ; %bb.0:
5772; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
5773; GFX10-NEXT:    v_lshrrev_b32_e32 v4, 31, v1
5774; GFX10-NEXT:    s_mov_b64 s[6:7], 0x7f
5775; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
5776; GFX10-NEXT:    s_andn2_b64 s[8:9], s[6:7], s[4:5]
5777; GFX10-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
5778; GFX10-NEXT:    v_or_b32_e32 v2, v2, v4
5779; GFX10-NEXT:    s_sub_i32 s4, 64, s8
5780; GFX10-NEXT:    s_sub_i32 s5, s8, 64
5781; GFX10-NEXT:    v_lshrrev_b64 v[4:5], s4, v[0:1]
5782; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
5783; GFX10-NEXT:    v_lshlrev_b64 v[6:7], s8, v[2:3]
5784; GFX10-NEXT:    s_cselect_b32 vcc_lo, 1, 0
5785; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
5786; GFX10-NEXT:    v_lshlrev_b64 v[8:9], s8, v[0:1]
5787; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
5788; GFX10-NEXT:    s_and_b32 s4, 1, vcc_lo
5789; GFX10-NEXT:    v_lshlrev_b64 v[0:1], s5, v[0:1]
5790; GFX10-NEXT:    v_or_b32_e32 v4, v4, v6
5791; GFX10-NEXT:    v_or_b32_e32 v5, v5, v7
5792; GFX10-NEXT:    v_cmp_ne_u32_e64 s4, 0, s4
5793; GFX10-NEXT:    s_sub_i32 s10, s6, 64
5794; GFX10-NEXT:    v_cndmask_b32_e32 v6, 0, v8, vcc_lo
5795; GFX10-NEXT:    v_cndmask_b32_e32 v7, 0, v9, vcc_lo
5796; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s4
5797; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v5, s4
5798; GFX10-NEXT:    s_and_b32 s4, 1, s7
5799; GFX10-NEXT:    s_sub_i32 s7, 64, s6
5800; GFX10-NEXT:    s_cmp_lt_u32 s6, 64
5801; GFX10-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s4
5802; GFX10-NEXT:    s_cselect_b32 s11, 1, 0
5803; GFX10-NEXT:    s_cmp_eq_u32 s6, 0
5804; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
5805; GFX10-NEXT:    s_lshr_b64 s[4:5], s[0:1], s6
5806; GFX10-NEXT:    s_lshl_b64 s[8:9], s[2:3], s7
5807; GFX10-NEXT:    s_lshr_b64 s[6:7], s[2:3], s6
5808; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
5809; GFX10-NEXT:    s_lshr_b64 s[2:3], s[2:3], s10
5810; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
5811; GFX10-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc_lo
5812; GFX10-NEXT:    s_cselect_b64 s[2:3], s[4:5], s[2:3]
5813; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
5814; GFX10-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc_lo
5815; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
5816; GFX10-NEXT:    s_cmp_lg_u32 s11, 0
5817; GFX10-NEXT:    v_or_b32_e32 v0, s0, v6
5818; GFX10-NEXT:    s_cselect_b64 s[2:3], s[6:7], 0
5819; GFX10-NEXT:    v_or_b32_e32 v1, s1, v7
5820; GFX10-NEXT:    v_or_b32_e32 v2, s2, v2
5821; GFX10-NEXT:    v_or_b32_e32 v3, s3, v3
5822; GFX10-NEXT:    ; return to shader part epilog
5823  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
5824  %cast.result = bitcast i128 %result to <4 x float>
5825  ret <4 x float> %cast.result
5826}
5827
5828define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
5829; GFX6-LABEL: s_fshr_i128_65:
5830; GFX6:       ; %bb.0:
5831; GFX6-NEXT:    s_mov_b32 s4, 0
5832; GFX6-NEXT:    s_lshl_b32 s5, s0, 31
5833; GFX6-NEXT:    s_lshl_b32 s3, s2, 31
5834; GFX6-NEXT:    s_mov_b32 s2, s4
5835; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5836; GFX6-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
5837; GFX6-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
5838; GFX6-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
5839; GFX6-NEXT:    ; return to shader part epilog
5840;
5841; GFX8-LABEL: s_fshr_i128_65:
5842; GFX8:       ; %bb.0:
5843; GFX8-NEXT:    s_mov_b32 s4, 0
5844; GFX8-NEXT:    s_lshl_b32 s5, s0, 31
5845; GFX8-NEXT:    s_lshl_b32 s3, s2, 31
5846; GFX8-NEXT:    s_mov_b32 s2, s4
5847; GFX8-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5848; GFX8-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
5849; GFX8-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
5850; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
5851; GFX8-NEXT:    ; return to shader part epilog
5852;
5853; GFX9-LABEL: s_fshr_i128_65:
5854; GFX9:       ; %bb.0:
5855; GFX9-NEXT:    s_mov_b32 s4, 0
5856; GFX9-NEXT:    s_lshl_b32 s5, s0, 31
5857; GFX9-NEXT:    s_lshl_b32 s3, s2, 31
5858; GFX9-NEXT:    s_mov_b32 s2, s4
5859; GFX9-NEXT:    s_lshr_b64 s[0:1], s[0:1], 1
5860; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], s[0:1]
5861; GFX9-NEXT:    s_lshr_b64 s[0:1], s[6:7], 1
5862; GFX9-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
5863; GFX9-NEXT:    ; return to shader part epilog
5864;
5865; GFX10-LABEL: s_fshr_i128_65:
5866; GFX10:       ; %bb.0:
5867; GFX10-NEXT:    s_mov_b32 s4, 0
5868; GFX10-NEXT:    s_lshl_b32 s5, s0, 31
5869; GFX10-NEXT:    s_lshl_b32 s3, s2, 31
5870; GFX10-NEXT:    s_mov_b32 s2, s4
5871; GFX10-NEXT:    s_lshr_b64 s[6:7], s[6:7], 1
5872; GFX10-NEXT:    s_lshr_b64 s[8:9], s[0:1], 1
5873; GFX10-NEXT:    s_or_b64 s[0:1], s[4:5], s[6:7]
5874; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
5875; GFX10-NEXT:    ; return to shader part epilog
5876  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
5877  ret i128 %result
5878}
5879
5880define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) {
5881; GFX6-LABEL: v_fshr_i128_65:
5882; GFX6:       ; %bb.0:
5883; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5884; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
5885; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
5886; GFX6-NEXT:    v_lshr_b64 v[2:3], v[0:1], 1
5887; GFX6-NEXT:    v_lshr_b64 v[0:1], v[6:7], 1
5888; GFX6-NEXT:    v_or_b32_e32 v3, v5, v3
5889; GFX6-NEXT:    v_or_b32_e32 v1, v4, v1
5890; GFX6-NEXT:    s_setpc_b64 s[30:31]
5891;
5892; GFX8-LABEL: v_fshr_i128_65:
5893; GFX8:       ; %bb.0:
5894; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5895; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
5896; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
5897; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
5898; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
5899; GFX8-NEXT:    v_or_b32_e32 v3, v5, v3
5900; GFX8-NEXT:    v_or_b32_e32 v1, v4, v1
5901; GFX8-NEXT:    s_setpc_b64 s[30:31]
5902;
5903; GFX9-LABEL: v_fshr_i128_65:
5904; GFX9:       ; %bb.0:
5905; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5906; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 31, v0
5907; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 31, v2
5908; GFX9-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
5909; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 1, v[6:7]
5910; GFX9-NEXT:    v_or_b32_e32 v3, v5, v3
5911; GFX9-NEXT:    v_or_b32_e32 v1, v4, v1
5912; GFX9-NEXT:    s_setpc_b64 s[30:31]
5913;
5914; GFX10-LABEL: v_fshr_i128_65:
5915; GFX10:       ; %bb.0:
5916; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5917; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
5918; GFX10-NEXT:    v_mov_b32_e32 v8, v2
5919; GFX10-NEXT:    v_lshrrev_b64 v[4:5], 1, v[6:7]
5920; GFX10-NEXT:    v_lshrrev_b64 v[2:3], 1, v[0:1]
5921; GFX10-NEXT:    v_lshlrev_b32_e32 v9, 31, v0
5922; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 31, v8
5923; GFX10-NEXT:    v_or_b32_e32 v1, v9, v5
5924; GFX10-NEXT:    v_or_b32_e32 v3, v0, v3
5925; GFX10-NEXT:    v_mov_b32_e32 v0, v4
5926; GFX10-NEXT:    s_setpc_b64 s[30:31]
5927  %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
5928  ret i128 %result
5929}
5930
5931define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
5932; GFX6-LABEL: s_fshr_v2i128:
5933; GFX6:       ; %bb.0:
5934; GFX6-NEXT:    s_movk_i32 s18, 0x7f
5935; GFX6-NEXT:    s_mov_b32 s19, 0
5936; GFX6-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
5937; GFX6-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
5938; GFX6-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
5939; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
5940; GFX6-NEXT:    s_lshr_b32 s0, s1, 31
5941; GFX6-NEXT:    s_mov_b32 s1, s19
5942; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
5943; GFX6-NEXT:    s_sub_i32 s23, s16, 64
5944; GFX6-NEXT:    s_sub_i32 s17, 64, s16
5945; GFX6-NEXT:    s_cmp_lt_u32 s16, 64
5946; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
5947; GFX6-NEXT:    s_cmp_eq_u32 s16, 0
5948; GFX6-NEXT:    s_cselect_b32 s29, 1, 0
5949; GFX6-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
5950; GFX6-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
5951; GFX6-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
5952; GFX6-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
5953; GFX6-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
5954; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
5955; GFX6-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
5956; GFX6-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
5957; GFX6-NEXT:    s_cmp_lg_u32 s29, 0
5958; GFX6-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
5959; GFX6-NEXT:    s_sub_i32 s26, s22, 64
5960; GFX6-NEXT:    s_sub_i32 s24, 64, s22
5961; GFX6-NEXT:    s_cmp_lt_u32 s22, 64
5962; GFX6-NEXT:    s_cselect_b32 s27, 1, 0
5963; GFX6-NEXT:    s_cmp_eq_u32 s22, 0
5964; GFX6-NEXT:    s_cselect_b32 s28, 1, 0
5965; GFX6-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
5966; GFX6-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
5967; GFX6-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
5968; GFX6-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
5969; GFX6-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
5970; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
5971; GFX6-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
5972; GFX6-NEXT:    s_cmp_lg_u32 s28, 0
5973; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
5974; GFX6-NEXT:    s_cmp_lg_u32 s27, 0
5975; GFX6-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
5976; GFX6-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
5977; GFX6-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
5978; GFX6-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
5979; GFX6-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
5980; GFX6-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
5981; GFX6-NEXT:    s_lshr_b32 s18, s5, 31
5982; GFX6-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
5983; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
5984; GFX6-NEXT:    s_sub_i32 s9, s10, 64
5985; GFX6-NEXT:    s_sub_i32 s11, 64, s10
5986; GFX6-NEXT:    s_cmp_lt_u32 s10, 64
5987; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
5988; GFX6-NEXT:    s_cmp_eq_u32 s10, 0
5989; GFX6-NEXT:    s_cselect_b32 s21, 1, 0
5990; GFX6-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
5991; GFX6-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
5992; GFX6-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
5993; GFX6-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
5994; GFX6-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
5995; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
5996; GFX6-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
5997; GFX6-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
5998; GFX6-NEXT:    s_cmp_lg_u32 s21, 0
5999; GFX6-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
6000; GFX6-NEXT:    s_sub_i32 s18, s8, 64
6001; GFX6-NEXT:    s_sub_i32 s16, 64, s8
6002; GFX6-NEXT:    s_cmp_lt_u32 s8, 64
6003; GFX6-NEXT:    s_cselect_b32 s19, 1, 0
6004; GFX6-NEXT:    s_cmp_eq_u32 s8, 0
6005; GFX6-NEXT:    s_cselect_b32 s20, 1, 0
6006; GFX6-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
6007; GFX6-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
6008; GFX6-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
6009; GFX6-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
6010; GFX6-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
6011; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
6012; GFX6-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
6013; GFX6-NEXT:    s_cmp_lg_u32 s20, 0
6014; GFX6-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
6015; GFX6-NEXT:    s_cmp_lg_u32 s19, 0
6016; GFX6-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
6017; GFX6-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
6018; GFX6-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
6019; GFX6-NEXT:    ; return to shader part epilog
6020;
6021; GFX8-LABEL: s_fshr_v2i128:
6022; GFX8:       ; %bb.0:
6023; GFX8-NEXT:    s_movk_i32 s18, 0x7f
6024; GFX8-NEXT:    s_mov_b32 s19, 0
6025; GFX8-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
6026; GFX8-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
6027; GFX8-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
6028; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6029; GFX8-NEXT:    s_lshr_b32 s0, s1, 31
6030; GFX8-NEXT:    s_mov_b32 s1, s19
6031; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6032; GFX8-NEXT:    s_sub_i32 s23, s16, 64
6033; GFX8-NEXT:    s_sub_i32 s17, 64, s16
6034; GFX8-NEXT:    s_cmp_lt_u32 s16, 64
6035; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
6036; GFX8-NEXT:    s_cmp_eq_u32 s16, 0
6037; GFX8-NEXT:    s_cselect_b32 s29, 1, 0
6038; GFX8-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
6039; GFX8-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
6040; GFX8-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
6041; GFX8-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
6042; GFX8-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
6043; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
6044; GFX8-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6045; GFX8-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
6046; GFX8-NEXT:    s_cmp_lg_u32 s29, 0
6047; GFX8-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
6048; GFX8-NEXT:    s_sub_i32 s26, s22, 64
6049; GFX8-NEXT:    s_sub_i32 s24, 64, s22
6050; GFX8-NEXT:    s_cmp_lt_u32 s22, 64
6051; GFX8-NEXT:    s_cselect_b32 s27, 1, 0
6052; GFX8-NEXT:    s_cmp_eq_u32 s22, 0
6053; GFX8-NEXT:    s_cselect_b32 s28, 1, 0
6054; GFX8-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
6055; GFX8-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
6056; GFX8-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
6057; GFX8-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
6058; GFX8-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
6059; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
6060; GFX8-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
6061; GFX8-NEXT:    s_cmp_lg_u32 s28, 0
6062; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
6063; GFX8-NEXT:    s_cmp_lg_u32 s27, 0
6064; GFX8-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
6065; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
6066; GFX8-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
6067; GFX8-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
6068; GFX8-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
6069; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
6070; GFX8-NEXT:    s_lshr_b32 s18, s5, 31
6071; GFX8-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
6072; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
6073; GFX8-NEXT:    s_sub_i32 s9, s10, 64
6074; GFX8-NEXT:    s_sub_i32 s11, 64, s10
6075; GFX8-NEXT:    s_cmp_lt_u32 s10, 64
6076; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
6077; GFX8-NEXT:    s_cmp_eq_u32 s10, 0
6078; GFX8-NEXT:    s_cselect_b32 s21, 1, 0
6079; GFX8-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
6080; GFX8-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
6081; GFX8-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
6082; GFX8-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
6083; GFX8-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
6084; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
6085; GFX8-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
6086; GFX8-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
6087; GFX8-NEXT:    s_cmp_lg_u32 s21, 0
6088; GFX8-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
6089; GFX8-NEXT:    s_sub_i32 s18, s8, 64
6090; GFX8-NEXT:    s_sub_i32 s16, 64, s8
6091; GFX8-NEXT:    s_cmp_lt_u32 s8, 64
6092; GFX8-NEXT:    s_cselect_b32 s19, 1, 0
6093; GFX8-NEXT:    s_cmp_eq_u32 s8, 0
6094; GFX8-NEXT:    s_cselect_b32 s20, 1, 0
6095; GFX8-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
6096; GFX8-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
6097; GFX8-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
6098; GFX8-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
6099; GFX8-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
6100; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
6101; GFX8-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
6102; GFX8-NEXT:    s_cmp_lg_u32 s20, 0
6103; GFX8-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
6104; GFX8-NEXT:    s_cmp_lg_u32 s19, 0
6105; GFX8-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
6106; GFX8-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
6107; GFX8-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
6108; GFX8-NEXT:    ; return to shader part epilog
6109;
6110; GFX9-LABEL: s_fshr_v2i128:
6111; GFX9:       ; %bb.0:
6112; GFX9-NEXT:    s_movk_i32 s18, 0x7f
6113; GFX9-NEXT:    s_mov_b32 s19, 0
6114; GFX9-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
6115; GFX9-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
6116; GFX9-NEXT:    s_lshl_b64 s[24:25], s[0:1], 1
6117; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6118; GFX9-NEXT:    s_lshr_b32 s0, s1, 31
6119; GFX9-NEXT:    s_mov_b32 s1, s19
6120; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
6121; GFX9-NEXT:    s_sub_i32 s23, s16, 64
6122; GFX9-NEXT:    s_sub_i32 s17, 64, s16
6123; GFX9-NEXT:    s_cmp_lt_u32 s16, 64
6124; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
6125; GFX9-NEXT:    s_cmp_eq_u32 s16, 0
6126; GFX9-NEXT:    s_cselect_b32 s29, 1, 0
6127; GFX9-NEXT:    s_lshl_b64 s[2:3], s[24:25], s16
6128; GFX9-NEXT:    s_lshr_b64 s[26:27], s[24:25], s17
6129; GFX9-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
6130; GFX9-NEXT:    s_or_b64 s[16:17], s[26:27], s[16:17]
6131; GFX9-NEXT:    s_lshl_b64 s[24:25], s[24:25], s23
6132; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
6133; GFX9-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
6134; GFX9-NEXT:    s_cselect_b64 s[16:17], s[16:17], s[24:25]
6135; GFX9-NEXT:    s_cmp_lg_u32 s29, 0
6136; GFX9-NEXT:    s_cselect_b64 s[16:17], s[0:1], s[16:17]
6137; GFX9-NEXT:    s_sub_i32 s26, s22, 64
6138; GFX9-NEXT:    s_sub_i32 s24, 64, s22
6139; GFX9-NEXT:    s_cmp_lt_u32 s22, 64
6140; GFX9-NEXT:    s_cselect_b32 s27, 1, 0
6141; GFX9-NEXT:    s_cmp_eq_u32 s22, 0
6142; GFX9-NEXT:    s_cselect_b32 s28, 1, 0
6143; GFX9-NEXT:    s_lshr_b64 s[0:1], s[10:11], s22
6144; GFX9-NEXT:    s_lshr_b64 s[22:23], s[8:9], s22
6145; GFX9-NEXT:    s_lshl_b64 s[24:25], s[10:11], s24
6146; GFX9-NEXT:    s_or_b64 s[22:23], s[22:23], s[24:25]
6147; GFX9-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
6148; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
6149; GFX9-NEXT:    s_cselect_b64 s[10:11], s[22:23], s[10:11]
6150; GFX9-NEXT:    s_cmp_lg_u32 s28, 0
6151; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[10:11]
6152; GFX9-NEXT:    s_cmp_lg_u32 s27, 0
6153; GFX9-NEXT:    s_cselect_b64 s[10:11], s[0:1], 0
6154; GFX9-NEXT:    s_or_b64 s[0:1], s[2:3], s[8:9]
6155; GFX9-NEXT:    s_or_b64 s[2:3], s[16:17], s[10:11]
6156; GFX9-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
6157; GFX9-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
6158; GFX9-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
6159; GFX9-NEXT:    s_lshr_b32 s18, s5, 31
6160; GFX9-NEXT:    s_lshl_b64 s[16:17], s[4:5], 1
6161; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[18:19]
6162; GFX9-NEXT:    s_sub_i32 s9, s10, 64
6163; GFX9-NEXT:    s_sub_i32 s11, 64, s10
6164; GFX9-NEXT:    s_cmp_lt_u32 s10, 64
6165; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
6166; GFX9-NEXT:    s_cmp_eq_u32 s10, 0
6167; GFX9-NEXT:    s_cselect_b32 s21, 1, 0
6168; GFX9-NEXT:    s_lshl_b64 s[6:7], s[16:17], s10
6169; GFX9-NEXT:    s_lshr_b64 s[18:19], s[16:17], s11
6170; GFX9-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
6171; GFX9-NEXT:    s_or_b64 s[10:11], s[18:19], s[10:11]
6172; GFX9-NEXT:    s_lshl_b64 s[16:17], s[16:17], s9
6173; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
6174; GFX9-NEXT:    s_cselect_b64 s[6:7], s[6:7], 0
6175; GFX9-NEXT:    s_cselect_b64 s[10:11], s[10:11], s[16:17]
6176; GFX9-NEXT:    s_cmp_lg_u32 s21, 0
6177; GFX9-NEXT:    s_cselect_b64 s[10:11], s[4:5], s[10:11]
6178; GFX9-NEXT:    s_sub_i32 s18, s8, 64
6179; GFX9-NEXT:    s_sub_i32 s16, 64, s8
6180; GFX9-NEXT:    s_cmp_lt_u32 s8, 64
6181; GFX9-NEXT:    s_cselect_b32 s19, 1, 0
6182; GFX9-NEXT:    s_cmp_eq_u32 s8, 0
6183; GFX9-NEXT:    s_cselect_b32 s20, 1, 0
6184; GFX9-NEXT:    s_lshr_b64 s[4:5], s[14:15], s8
6185; GFX9-NEXT:    s_lshr_b64 s[8:9], s[12:13], s8
6186; GFX9-NEXT:    s_lshl_b64 s[16:17], s[14:15], s16
6187; GFX9-NEXT:    s_or_b64 s[8:9], s[8:9], s[16:17]
6188; GFX9-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
6189; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
6190; GFX9-NEXT:    s_cselect_b64 s[8:9], s[8:9], s[14:15]
6191; GFX9-NEXT:    s_cmp_lg_u32 s20, 0
6192; GFX9-NEXT:    s_cselect_b64 s[8:9], s[12:13], s[8:9]
6193; GFX9-NEXT:    s_cmp_lg_u32 s19, 0
6194; GFX9-NEXT:    s_cselect_b64 s[12:13], s[4:5], 0
6195; GFX9-NEXT:    s_or_b64 s[4:5], s[6:7], s[8:9]
6196; GFX9-NEXT:    s_or_b64 s[6:7], s[10:11], s[12:13]
6197; GFX9-NEXT:    ; return to shader part epilog
6198;
6199; GFX10-LABEL: s_fshr_v2i128:
6200; GFX10:       ; %bb.0:
6201; GFX10-NEXT:    s_movk_i32 s18, 0x7f
6202; GFX10-NEXT:    s_mov_b32 s19, 0
6203; GFX10-NEXT:    s_lshl_b64 s[2:3], s[2:3], 1
6204; GFX10-NEXT:    s_and_b64 s[22:23], s[16:17], s[18:19]
6205; GFX10-NEXT:    s_andn2_b64 s[16:17], s[18:19], s[16:17]
6206; GFX10-NEXT:    s_lshr_b32 s24, s1, 31
6207; GFX10-NEXT:    s_mov_b32 s25, s19
6208; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], 1
6209; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[24:25]
6210; GFX10-NEXT:    s_sub_i32 s23, s16, 64
6211; GFX10-NEXT:    s_sub_i32 s17, 64, s16
6212; GFX10-NEXT:    s_cmp_lt_u32 s16, 64
6213; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
6214; GFX10-NEXT:    s_cmp_eq_u32 s16, 0
6215; GFX10-NEXT:    s_cselect_b32 s29, 1, 0
6216; GFX10-NEXT:    s_lshr_b64 s[24:25], s[0:1], s17
6217; GFX10-NEXT:    s_lshl_b64 s[26:27], s[2:3], s16
6218; GFX10-NEXT:    s_lshl_b64 s[16:17], s[0:1], s16
6219; GFX10-NEXT:    s_or_b64 s[24:25], s[24:25], s[26:27]
6220; GFX10-NEXT:    s_lshl_b64 s[0:1], s[0:1], s23
6221; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
6222; GFX10-NEXT:    s_cselect_b64 s[16:17], s[16:17], 0
6223; GFX10-NEXT:    s_cselect_b64 s[0:1], s[24:25], s[0:1]
6224; GFX10-NEXT:    s_cmp_lg_u32 s29, 0
6225; GFX10-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[0:1]
6226; GFX10-NEXT:    s_sub_i32 s26, s22, 64
6227; GFX10-NEXT:    s_sub_i32 s23, 64, s22
6228; GFX10-NEXT:    s_cmp_lt_u32 s22, 64
6229; GFX10-NEXT:    s_cselect_b32 s27, 1, 0
6230; GFX10-NEXT:    s_cmp_eq_u32 s22, 0
6231; GFX10-NEXT:    s_cselect_b32 s28, 1, 0
6232; GFX10-NEXT:    s_lshr_b64 s[0:1], s[8:9], s22
6233; GFX10-NEXT:    s_lshl_b64 s[24:25], s[10:11], s23
6234; GFX10-NEXT:    s_lshr_b64 s[22:23], s[10:11], s22
6235; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[24:25]
6236; GFX10-NEXT:    s_lshr_b64 s[10:11], s[10:11], s26
6237; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
6238; GFX10-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[10:11]
6239; GFX10-NEXT:    s_cmp_lg_u32 s28, 0
6240; GFX10-NEXT:    s_cselect_b64 s[0:1], s[8:9], s[0:1]
6241; GFX10-NEXT:    s_cmp_lg_u32 s27, 0
6242; GFX10-NEXT:    s_cselect_b64 s[8:9], s[22:23], 0
6243; GFX10-NEXT:    s_andn2_b64 s[10:11], s[18:19], s[20:21]
6244; GFX10-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
6245; GFX10-NEXT:    s_and_b64 s[8:9], s[20:21], s[18:19]
6246; GFX10-NEXT:    s_lshl_b64 s[6:7], s[6:7], 1
6247; GFX10-NEXT:    s_lshr_b32 s18, s5, 31
6248; GFX10-NEXT:    s_or_b64 s[0:1], s[16:17], s[0:1]
6249; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], 1
6250; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[18:19]
6251; GFX10-NEXT:    s_sub_i32 s9, s10, 64
6252; GFX10-NEXT:    s_sub_i32 s11, 64, s10
6253; GFX10-NEXT:    s_cmp_lt_u32 s10, 64
6254; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
6255; GFX10-NEXT:    s_cmp_eq_u32 s10, 0
6256; GFX10-NEXT:    s_cselect_b32 s21, 1, 0
6257; GFX10-NEXT:    s_lshr_b64 s[16:17], s[4:5], s11
6258; GFX10-NEXT:    s_lshl_b64 s[18:19], s[6:7], s10
6259; GFX10-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
6260; GFX10-NEXT:    s_or_b64 s[16:17], s[16:17], s[18:19]
6261; GFX10-NEXT:    s_lshl_b64 s[4:5], s[4:5], s9
6262; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
6263; GFX10-NEXT:    s_cselect_b64 s[10:11], s[10:11], 0
6264; GFX10-NEXT:    s_cselect_b64 s[4:5], s[16:17], s[4:5]
6265; GFX10-NEXT:    s_cmp_lg_u32 s21, 0
6266; GFX10-NEXT:    s_cselect_b64 s[6:7], s[6:7], s[4:5]
6267; GFX10-NEXT:    s_sub_i32 s18, s8, 64
6268; GFX10-NEXT:    s_sub_i32 s9, 64, s8
6269; GFX10-NEXT:    s_cmp_lt_u32 s8, 64
6270; GFX10-NEXT:    s_cselect_b32 s19, 1, 0
6271; GFX10-NEXT:    s_cmp_eq_u32 s8, 0
6272; GFX10-NEXT:    s_cselect_b32 s20, 1, 0
6273; GFX10-NEXT:    s_lshr_b64 s[4:5], s[12:13], s8
6274; GFX10-NEXT:    s_lshl_b64 s[16:17], s[14:15], s9
6275; GFX10-NEXT:    s_lshr_b64 s[8:9], s[14:15], s8
6276; GFX10-NEXT:    s_or_b64 s[4:5], s[4:5], s[16:17]
6277; GFX10-NEXT:    s_lshr_b64 s[14:15], s[14:15], s18
6278; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
6279; GFX10-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[14:15]
6280; GFX10-NEXT:    s_cmp_lg_u32 s20, 0
6281; GFX10-NEXT:    s_cselect_b64 s[4:5], s[12:13], s[4:5]
6282; GFX10-NEXT:    s_cmp_lg_u32 s19, 0
6283; GFX10-NEXT:    s_cselect_b64 s[8:9], s[8:9], 0
6284; GFX10-NEXT:    s_or_b64 s[4:5], s[10:11], s[4:5]
6285; GFX10-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
6286; GFX10-NEXT:    ; return to shader part epilog
6287  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
6288  ret <2 x i128> %result
6289}
6290
6291define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
6292; GFX6-LABEL: v_fshr_v2i128:
6293; GFX6:       ; %bb.0:
6294; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6295; GFX6-NEXT:    s_movk_i32 s6, 0x7f
6296; GFX6-NEXT:    v_xor_b32_e32 v17, -1, v16
6297; GFX6-NEXT:    v_lshl_b64 v[2:3], v[2:3], 1
6298; GFX6-NEXT:    v_and_b32_e32 v23, s6, v17
6299; GFX6-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6300; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
6301; GFX6-NEXT:    v_or_b32_e32 v2, v2, v17
6302; GFX6-NEXT:    v_sub_i32_e32 v17, vcc, 64, v23
6303; GFX6-NEXT:    v_lshr_b64 v[17:18], v[0:1], v17
6304; GFX6-NEXT:    v_lshl_b64 v[21:22], v[2:3], v23
6305; GFX6-NEXT:    v_and_b32_e32 v24, s6, v16
6306; GFX6-NEXT:    v_sub_i32_e32 v16, vcc, 64, v24
6307; GFX6-NEXT:    v_or_b32_e32 v21, v17, v21
6308; GFX6-NEXT:    v_or_b32_e32 v22, v18, v22
6309; GFX6-NEXT:    v_lshl_b64 v[16:17], v[10:11], v16
6310; GFX6-NEXT:    v_lshr_b64 v[18:19], v[8:9], v24
6311; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
6312; GFX6-NEXT:    v_or_b32_e32 v18, v18, v16
6313; GFX6-NEXT:    v_subrev_i32_e32 v16, vcc, 64, v23
6314; GFX6-NEXT:    v_or_b32_e32 v19, v19, v17
6315; GFX6-NEXT:    v_lshl_b64 v[16:17], v[0:1], v16
6316; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], v23
6317; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
6318; GFX6-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
6319; GFX6-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
6320; GFX6-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
6321; GFX6-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6322; GFX6-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6323; GFX6-NEXT:    v_subrev_i32_e64 v0, s[4:5], 64, v24
6324; GFX6-NEXT:    v_lshr_b64 v[2:3], v[10:11], v0
6325; GFX6-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
6326; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6327; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
6328; GFX6-NEXT:    v_lshr_b64 v[0:1], v[10:11], v24
6329; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
6330; GFX6-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6331; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
6332; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6333; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
6334; GFX6-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6335; GFX6-NEXT:    v_or_b32_e32 v0, v25, v2
6336; GFX6-NEXT:    v_or_b32_e32 v2, v17, v8
6337; GFX6-NEXT:    v_xor_b32_e32 v8, -1, v20
6338; GFX6-NEXT:    v_lshl_b64 v[6:7], v[6:7], 1
6339; GFX6-NEXT:    v_or_b32_e32 v1, v18, v3
6340; GFX6-NEXT:    v_or_b32_e32 v3, v16, v9
6341; GFX6-NEXT:    v_and_b32_e32 v17, s6, v8
6342; GFX6-NEXT:    v_lshl_b64 v[8:9], v[4:5], 1
6343; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
6344; GFX6-NEXT:    v_or_b32_e32 v6, v6, v4
6345; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, 64, v17
6346; GFX6-NEXT:    v_lshr_b64 v[4:5], v[8:9], v4
6347; GFX6-NEXT:    v_lshl_b64 v[10:11], v[6:7], v17
6348; GFX6-NEXT:    v_subrev_i32_e32 v18, vcc, 64, v17
6349; GFX6-NEXT:    v_or_b32_e32 v10, v4, v10
6350; GFX6-NEXT:    v_or_b32_e32 v11, v5, v11
6351; GFX6-NEXT:    v_lshl_b64 v[4:5], v[8:9], v17
6352; GFX6-NEXT:    v_lshl_b64 v[8:9], v[8:9], v18
6353; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
6354; GFX6-NEXT:    v_and_b32_e32 v16, s6, v20
6355; GFX6-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
6356; GFX6-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
6357; GFX6-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
6358; GFX6-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
6359; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
6360; GFX6-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
6361; GFX6-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
6362; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, 64, v16
6363; GFX6-NEXT:    v_lshr_b64 v[4:5], v[12:13], v16
6364; GFX6-NEXT:    v_lshl_b64 v[6:7], v[14:15], v6
6365; GFX6-NEXT:    v_subrev_i32_e32 v10, vcc, 64, v16
6366; GFX6-NEXT:    v_or_b32_e32 v11, v4, v6
6367; GFX6-NEXT:    v_or_b32_e32 v17, v5, v7
6368; GFX6-NEXT:    v_lshr_b64 v[6:7], v[14:15], v10
6369; GFX6-NEXT:    v_lshr_b64 v[4:5], v[14:15], v16
6370; GFX6-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
6371; GFX6-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
6372; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
6373; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
6374; GFX6-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
6375; GFX6-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
6376; GFX6-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
6377; GFX6-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
6378; GFX6-NEXT:    v_or_b32_e32 v4, v18, v6
6379; GFX6-NEXT:    v_or_b32_e32 v5, v19, v7
6380; GFX6-NEXT:    v_or_b32_e32 v6, v8, v10
6381; GFX6-NEXT:    v_or_b32_e32 v7, v9, v11
6382; GFX6-NEXT:    s_setpc_b64 s[30:31]
6383;
6384; GFX8-LABEL: v_fshr_v2i128:
6385; GFX8:       ; %bb.0:
6386; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6387; GFX8-NEXT:    s_movk_i32 s6, 0x7f
6388; GFX8-NEXT:    v_xor_b32_e32 v17, -1, v16
6389; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6390; GFX8-NEXT:    v_and_b32_e32 v23, s6, v17
6391; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6392; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6393; GFX8-NEXT:    v_or_b32_e32 v2, v2, v17
6394; GFX8-NEXT:    v_sub_u32_e32 v17, vcc, 64, v23
6395; GFX8-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
6396; GFX8-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
6397; GFX8-NEXT:    v_and_b32_e32 v24, s6, v16
6398; GFX8-NEXT:    v_sub_u32_e32 v16, vcc, 64, v24
6399; GFX8-NEXT:    v_or_b32_e32 v21, v17, v21
6400; GFX8-NEXT:    v_or_b32_e32 v22, v18, v22
6401; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
6402; GFX8-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
6403; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
6404; GFX8-NEXT:    v_or_b32_e32 v18, v18, v16
6405; GFX8-NEXT:    v_subrev_u32_e32 v16, vcc, 64, v23
6406; GFX8-NEXT:    v_or_b32_e32 v19, v19, v17
6407; GFX8-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
6408; GFX8-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
6409; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
6410; GFX8-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
6411; GFX8-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
6412; GFX8-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
6413; GFX8-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6414; GFX8-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6415; GFX8-NEXT:    v_subrev_u32_e64 v0, s[4:5], 64, v24
6416; GFX8-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
6417; GFX8-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
6418; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6419; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
6420; GFX8-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
6421; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
6422; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6423; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
6424; GFX8-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6425; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
6426; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6427; GFX8-NEXT:    v_or_b32_e32 v0, v25, v2
6428; GFX8-NEXT:    v_or_b32_e32 v2, v17, v8
6429; GFX8-NEXT:    v_xor_b32_e32 v8, -1, v20
6430; GFX8-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
6431; GFX8-NEXT:    v_or_b32_e32 v1, v18, v3
6432; GFX8-NEXT:    v_or_b32_e32 v3, v16, v9
6433; GFX8-NEXT:    v_and_b32_e32 v17, s6, v8
6434; GFX8-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
6435; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
6436; GFX8-NEXT:    v_or_b32_e32 v6, v6, v4
6437; GFX8-NEXT:    v_sub_u32_e32 v4, vcc, 64, v17
6438; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
6439; GFX8-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
6440; GFX8-NEXT:    v_subrev_u32_e32 v18, vcc, 64, v17
6441; GFX8-NEXT:    v_or_b32_e32 v10, v4, v10
6442; GFX8-NEXT:    v_or_b32_e32 v11, v5, v11
6443; GFX8-NEXT:    v_lshlrev_b64 v[4:5], v17, v[8:9]
6444; GFX8-NEXT:    v_lshlrev_b64 v[8:9], v18, v[8:9]
6445; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
6446; GFX8-NEXT:    v_and_b32_e32 v16, s6, v20
6447; GFX8-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
6448; GFX8-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
6449; GFX8-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
6450; GFX8-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
6451; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
6452; GFX8-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
6453; GFX8-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
6454; GFX8-NEXT:    v_sub_u32_e32 v6, vcc, 64, v16
6455; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v16, v[12:13]
6456; GFX8-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
6457; GFX8-NEXT:    v_subrev_u32_e32 v10, vcc, 64, v16
6458; GFX8-NEXT:    v_or_b32_e32 v11, v4, v6
6459; GFX8-NEXT:    v_or_b32_e32 v17, v5, v7
6460; GFX8-NEXT:    v_lshrrev_b64 v[6:7], v10, v[14:15]
6461; GFX8-NEXT:    v_lshrrev_b64 v[4:5], v16, v[14:15]
6462; GFX8-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
6463; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
6464; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
6465; GFX8-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
6466; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
6467; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
6468; GFX8-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
6469; GFX8-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
6470; GFX8-NEXT:    v_or_b32_e32 v4, v18, v6
6471; GFX8-NEXT:    v_or_b32_e32 v5, v19, v7
6472; GFX8-NEXT:    v_or_b32_e32 v6, v8, v10
6473; GFX8-NEXT:    v_or_b32_e32 v7, v9, v11
6474; GFX8-NEXT:    s_setpc_b64 s[30:31]
6475;
6476; GFX9-LABEL: v_fshr_v2i128:
6477; GFX9:       ; %bb.0:
6478; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6479; GFX9-NEXT:    s_movk_i32 s6, 0x7f
6480; GFX9-NEXT:    v_xor_b32_e32 v17, -1, v16
6481; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6482; GFX9-NEXT:    v_and_b32_e32 v23, s6, v17
6483; GFX9-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6484; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6485; GFX9-NEXT:    v_or_b32_e32 v2, v2, v17
6486; GFX9-NEXT:    v_sub_u32_e32 v17, 64, v23
6487; GFX9-NEXT:    v_lshrrev_b64 v[17:18], v17, v[0:1]
6488; GFX9-NEXT:    v_lshlrev_b64 v[21:22], v23, v[2:3]
6489; GFX9-NEXT:    v_and_b32_e32 v24, s6, v16
6490; GFX9-NEXT:    v_sub_u32_e32 v16, 64, v24
6491; GFX9-NEXT:    v_or_b32_e32 v21, v17, v21
6492; GFX9-NEXT:    v_or_b32_e32 v22, v18, v22
6493; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[10:11]
6494; GFX9-NEXT:    v_lshrrev_b64 v[18:19], v24, v[8:9]
6495; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v23
6496; GFX9-NEXT:    v_or_b32_e32 v18, v18, v16
6497; GFX9-NEXT:    v_subrev_u32_e32 v16, 64, v23
6498; GFX9-NEXT:    v_or_b32_e32 v19, v19, v17
6499; GFX9-NEXT:    v_lshlrev_b64 v[16:17], v16, v[0:1]
6500; GFX9-NEXT:    v_lshlrev_b64 v[0:1], v23, v[0:1]
6501; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v23
6502; GFX9-NEXT:    v_cndmask_b32_e32 v25, 0, v0, vcc
6503; GFX9-NEXT:    v_cndmask_b32_e32 v0, v16, v21, vcc
6504; GFX9-NEXT:    v_cndmask_b32_e32 v16, v17, v22, vcc
6505; GFX9-NEXT:    v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6506; GFX9-NEXT:    v_subrev_u32_e32 v0, 64, v24
6507; GFX9-NEXT:    v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6508; GFX9-NEXT:    v_lshrrev_b64 v[2:3], v0, v[10:11]
6509; GFX9-NEXT:    v_cmp_gt_u32_e64 s[4:5], 64, v24
6510; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6511; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v1, vcc
6512; GFX9-NEXT:    v_lshrrev_b64 v[0:1], v24, v[10:11]
6513; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v24
6514; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6515; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
6516; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6517; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
6518; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6519; GFX9-NEXT:    v_or_b32_e32 v0, v25, v2
6520; GFX9-NEXT:    v_or_b32_e32 v2, v17, v8
6521; GFX9-NEXT:    v_xor_b32_e32 v8, -1, v20
6522; GFX9-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
6523; GFX9-NEXT:    v_or_b32_e32 v1, v18, v3
6524; GFX9-NEXT:    v_or_b32_e32 v3, v16, v9
6525; GFX9-NEXT:    v_and_b32_e32 v17, s6, v8
6526; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 1, v[4:5]
6527; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v5
6528; GFX9-NEXT:    v_or_b32_e32 v6, v6, v4
6529; GFX9-NEXT:    v_sub_u32_e32 v4, 64, v17
6530; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v4, v[8:9]
6531; GFX9-NEXT:    v_lshlrev_b64 v[10:11], v17, v[6:7]
6532; GFX9-NEXT:    v_subrev_u32_e32 v18, 64, v17
6533; GFX9-NEXT:    v_or_b32_e32 v10, v4, v10
6534; GFX9-NEXT:    v_or_b32_e32 v11, v5, v11
6535; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v17, v[8:9]
6536; GFX9-NEXT:    v_lshlrev_b64 v[8:9], v18, v[8:9]
6537; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v17
6538; GFX9-NEXT:    v_and_b32_e32 v16, s6, v20
6539; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v4, vcc
6540; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v5, vcc
6541; GFX9-NEXT:    v_cndmask_b32_e32 v4, v8, v10, vcc
6542; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v11, vcc
6543; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v17
6544; GFX9-NEXT:    v_cndmask_b32_e32 v8, v4, v6, vcc
6545; GFX9-NEXT:    v_sub_u32_e32 v6, 64, v16
6546; GFX9-NEXT:    v_cndmask_b32_e32 v9, v5, v7, vcc
6547; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v16, v[12:13]
6548; GFX9-NEXT:    v_lshlrev_b64 v[6:7], v6, v[14:15]
6549; GFX9-NEXT:    v_subrev_u32_e32 v10, 64, v16
6550; GFX9-NEXT:    v_or_b32_e32 v11, v4, v6
6551; GFX9-NEXT:    v_or_b32_e32 v17, v5, v7
6552; GFX9-NEXT:    v_lshrrev_b64 v[6:7], v10, v[14:15]
6553; GFX9-NEXT:    v_lshrrev_b64 v[4:5], v16, v[14:15]
6554; GFX9-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v16
6555; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v11, vcc
6556; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v17, vcc
6557; GFX9-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v16
6558; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[4:5]
6559; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s[4:5]
6560; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v4, vcc
6561; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v5, vcc
6562; GFX9-NEXT:    v_or_b32_e32 v4, v18, v6
6563; GFX9-NEXT:    v_or_b32_e32 v5, v19, v7
6564; GFX9-NEXT:    v_or_b32_e32 v6, v8, v10
6565; GFX9-NEXT:    v_or_b32_e32 v7, v9, v11
6566; GFX9-NEXT:    s_setpc_b64 s[30:31]
6567;
6568; GFX10-LABEL: v_fshr_v2i128:
6569; GFX10:       ; %bb.0:
6570; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6571; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6572; GFX10-NEXT:    v_xor_b32_e32 v17, -1, v16
6573; GFX10-NEXT:    s_movk_i32 s5, 0x7f
6574; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
6575; GFX10-NEXT:    v_and_b32_e32 v26, s5, v16
6576; GFX10-NEXT:    v_lshlrev_b64 v[6:7], 1, v[6:7]
6577; GFX10-NEXT:    v_and_b32_e32 v25, s5, v17
6578; GFX10-NEXT:    v_lshrrev_b32_e32 v17, 31, v1
6579; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
6580; GFX10-NEXT:    v_subrev_nc_u32_e32 v27, 64, v26
6581; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v26
6582; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v25
6583; GFX10-NEXT:    v_or_b32_e32 v2, v2, v17
6584; GFX10-NEXT:    v_subrev_nc_u32_e32 v19, 64, v25
6585; GFX10-NEXT:    v_lshlrev_b64 v[23:24], v25, v[0:1]
6586; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
6587; GFX10-NEXT:    v_lshrrev_b64 v[17:18], v18, v[0:1]
6588; GFX10-NEXT:    v_lshlrev_b64 v[21:22], v25, v[2:3]
6589; GFX10-NEXT:    v_lshlrev_b64 v[0:1], v19, v[0:1]
6590; GFX10-NEXT:    v_cndmask_b32_e32 v23, 0, v23, vcc_lo
6591; GFX10-NEXT:    v_cndmask_b32_e32 v24, 0, v24, vcc_lo
6592; GFX10-NEXT:    v_or_b32_e32 v22, v18, v22
6593; GFX10-NEXT:    v_sub_nc_u32_e32 v18, 64, v26
6594; GFX10-NEXT:    v_or_b32_e32 v21, v17, v21
6595; GFX10-NEXT:    v_lshrrev_b64 v[16:17], v26, v[8:9]
6596; GFX10-NEXT:    v_cndmask_b32_e32 v22, v1, v22, vcc_lo
6597; GFX10-NEXT:    v_lshlrev_b64 v[18:19], v18, v[10:11]
6598; GFX10-NEXT:    v_cndmask_b32_e32 v21, v0, v21, vcc_lo
6599; GFX10-NEXT:    v_lshrrev_b64 v[0:1], v27, v[10:11]
6600; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
6601; GFX10-NEXT:    v_or_b32_e32 v16, v16, v18
6602; GFX10-NEXT:    v_or_b32_e32 v17, v17, v19
6603; GFX10-NEXT:    v_cndmask_b32_e32 v18, v21, v2, vcc_lo
6604; GFX10-NEXT:    v_cndmask_b32_e32 v22, v22, v3, vcc_lo
6605; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
6606; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v16, s4
6607; GFX10-NEXT:    v_xor_b32_e32 v16, -1, v20
6608; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v17, s4
6609; GFX10-NEXT:    v_lshrrev_b64 v[2:3], v26, v[10:11]
6610; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
6611; GFX10-NEXT:    v_and_b32_e32 v25, s5, v16
6612; GFX10-NEXT:    v_lshrrev_b32_e32 v8, 31, v5
6613; GFX10-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
6614; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc_lo
6615; GFX10-NEXT:    v_or_b32_e32 v0, v23, v0
6616; GFX10-NEXT:    v_sub_nc_u32_e32 v9, 64, v25
6617; GFX10-NEXT:    v_or_b32_e32 v6, v6, v8
6618; GFX10-NEXT:    v_and_b32_e32 v23, s5, v20
6619; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s4
6620; GFX10-NEXT:    v_cndmask_b32_e64 v26, 0, v3, s4
6621; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v9, v[4:5]
6622; GFX10-NEXT:    v_lshlrev_b64 v[10:11], v25, v[6:7]
6623; GFX10-NEXT:    v_sub_nc_u32_e32 v20, 64, v23
6624; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, 64, v25
6625; GFX10-NEXT:    v_or_b32_e32 v2, v18, v2
6626; GFX10-NEXT:    v_lshlrev_b64 v[16:17], v25, v[4:5]
6627; GFX10-NEXT:    v_lshrrev_b64 v[18:19], v23, v[12:13]
6628; GFX10-NEXT:    v_or_b32_e32 v10, v8, v10
6629; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, 64, v23
6630; GFX10-NEXT:    v_lshlrev_b64 v[20:21], v20, v[14:15]
6631; GFX10-NEXT:    v_cmp_gt_u32_e32 vcc_lo, 64, v25
6632; GFX10-NEXT:    v_lshlrev_b64 v[3:4], v3, v[4:5]
6633; GFX10-NEXT:    v_or_b32_e32 v5, v9, v11
6634; GFX10-NEXT:    v_lshrrev_b64 v[8:9], v8, v[14:15]
6635; GFX10-NEXT:    v_cmp_gt_u32_e64 s4, 64, v23
6636; GFX10-NEXT:    v_cndmask_b32_e32 v11, 0, v16, vcc_lo
6637; GFX10-NEXT:    v_or_b32_e32 v16, v18, v20
6638; GFX10-NEXT:    v_or_b32_e32 v18, v19, v21
6639; GFX10-NEXT:    v_cndmask_b32_e32 v10, v3, v10, vcc_lo
6640; GFX10-NEXT:    v_cndmask_b32_e32 v5, v4, v5, vcc_lo
6641; GFX10-NEXT:    v_lshrrev_b64 v[3:4], v23, v[14:15]
6642; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v16, s4
6643; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, 0, v23
6644; GFX10-NEXT:    v_cmp_eq_u32_e64 s6, 0, v25
6645; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v18, s4
6646; GFX10-NEXT:    v_cndmask_b32_e32 v14, 0, v17, vcc_lo
6647; GFX10-NEXT:    v_or_b32_e32 v1, v24, v1
6648; GFX10-NEXT:    v_cndmask_b32_e64 v6, v10, v6, s6
6649; GFX10-NEXT:    v_cndmask_b32_e64 v7, v5, v7, s6
6650; GFX10-NEXT:    v_cndmask_b32_e64 v5, v8, v12, s5
6651; GFX10-NEXT:    v_cndmask_b32_e64 v8, v9, v13, s5
6652; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, v3, s4
6653; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, v4, s4
6654; GFX10-NEXT:    v_or_b32_e32 v3, v22, v26
6655; GFX10-NEXT:    v_or_b32_e32 v4, v11, v5
6656; GFX10-NEXT:    v_or_b32_e32 v5, v14, v8
6657; GFX10-NEXT:    v_or_b32_e32 v6, v6, v9
6658; GFX10-NEXT:    v_or_b32_e32 v7, v7, v10
6659; GFX10-NEXT:    s_setpc_b64 s[30:31]
6660  %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
6661  ret <2 x i128> %result
6662}
6663
6664declare i7 @llvm.fshr.i7(i7, i7, i7) #0
6665declare i8 @llvm.fshr.i8(i8, i8, i8) #0
6666declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
6667declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
6668
6669declare i16 @llvm.fshr.i16(i16, i16, i16) #0
6670declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
6671declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
6672declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
6673declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
6674declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
6675declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
6676
6677declare i24 @llvm.fshr.i24(i24, i24, i24) #0
6678declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
6679
6680declare i32 @llvm.fshr.i32(i32, i32, i32) #0
6681declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
6682declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
6683declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
6684declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
6685declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
6686
6687declare i48 @llvm.fshr.i48(i48, i48, i48) #0
6688
6689declare i64 @llvm.fshr.i64(i64, i64, i64) #0
6690declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
6691
6692declare i128 @llvm.fshr.i128(i128, i128, i128) #0
6693declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
6694
6695attributes #0 = { nounwind readnone speculatable willreturn }
6696