1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s
3
4declare i8 @llvm.fshl.i8(i8, i8, i8)
5declare i16 @llvm.fshl.i16(i16, i16, i16)
6declare i32 @llvm.fshl.i32(i32, i32, i32)
7declare i64 @llvm.fshl.i64(i64, i64, i64)
8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
9
10declare i8 @llvm.fshr.i8(i8, i8, i8)
11declare i16 @llvm.fshr.i16(i16, i16, i16)
12declare i32 @llvm.fshr.i32(i32, i32, i32)
13declare i64 @llvm.fshr.i64(i64, i64, i64)
14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
15
16; General case - all operands can be variables.
17
18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
19; CHECK-LABEL: fshl_i32:
20; CHECK:       # %bb.0:
21; CHECK-NEXT:    andi. 5, 5, 31
22; CHECK-NEXT:    subfic 6, 5, 32
23; CHECK-NEXT:    slw 5, 3, 5
24; CHECK-NEXT:    srw 4, 4, 6
25; CHECK-NEXT:    or 4, 5, 4
26; CHECK-NEXT:    isel 3, 3, 4, 2
27; CHECK-NEXT:    blr
28  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
29  ret i32 %f
30}
31
32; Verify that weird types are minimally supported.
33declare i37 @llvm.fshl.i37(i37, i37, i37)
34define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
35; CHECK-LABEL: fshl_i37:
36; CHECK:       # %bb.0:
37; CHECK-NEXT:    lis 6, -8857
38; CHECK-NEXT:    clrldi 5, 5, 27
39; CHECK-NEXT:    ori 6, 6, 51366
40; CHECK-NEXT:    clrldi 4, 4, 27
41; CHECK-NEXT:    sldi 6, 6, 32
42; CHECK-NEXT:    oris 6, 6, 3542
43; CHECK-NEXT:    ori 6, 6, 31883
44; CHECK-NEXT:    mulhdu 6, 5, 6
45; CHECK-NEXT:    rldicl 6, 6, 59, 5
46; CHECK-NEXT:    mulli 6, 6, 37
47; CHECK-NEXT:    subf. 5, 6, 5
48; CHECK-NEXT:    subfic 6, 5, 37
49; CHECK-NEXT:    sld 5, 3, 5
50; CHECK-NEXT:    srd 4, 4, 6
51; CHECK-NEXT:    or 4, 5, 4
52; CHECK-NEXT:    isel 3, 3, 4, 2
53; CHECK-NEXT:    blr
54  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
55  ret i37 %f
56}
57
58; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
59
60declare i7 @llvm.fshl.i7(i7, i7, i7)
61define i7 @fshl_i7_const_fold() {
62; CHECK-LABEL: fshl_i7_const_fold:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    li 3, 67
65; CHECK-NEXT:    blr
66  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
67  ret i7 %f
68}
69
70; With constant shift amount, this is rotate + insert (missing extended mnemonics).
71
72define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
73; CHECK-LABEL: fshl_i32_const_shift:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    rlwinm 4, 4, 9, 0, 31
76; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
77; CHECK-NEXT:    mr 3, 4
78; CHECK-NEXT:    blr
79  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
80  ret i32 %f
81}
82
83; Check modulo math on shift amount.
84
85define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
86; CHECK-LABEL: fshl_i32_const_overshift:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    rlwinm 4, 4, 9, 0, 31
89; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
90; CHECK-NEXT:    mr 3, 4
91; CHECK-NEXT:    blr
92  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
93  ret i32 %f
94}
95
96; 64-bit should also work.
97
98define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
99; CHECK-LABEL: fshl_i64_const_overshift:
100; CHECK:       # %bb.0:
101; CHECK-NEXT:    rotldi 4, 4, 41
102; CHECK-NEXT:    rldimi 4, 3, 41, 0
103; CHECK-NEXT:    mr 3, 4
104; CHECK-NEXT:    blr
105  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
106  ret i64 %f
107}
108
109; This should work without any node-specific logic.
110
111define i8 @fshl_i8_const_fold() {
112; CHECK-LABEL: fshl_i8_const_fold:
113; CHECK:       # %bb.0:
114; CHECK-NEXT:    li 3, 128
115; CHECK-NEXT:    blr
116  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
117  ret i8 %f
118}
119
120; Repeat everything for funnel shift right.
121
122; General case - all operands can be variables.
123
124define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
125; CHECK-LABEL: fshr_i32:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    andi. 5, 5, 31
128; CHECK-NEXT:    subfic 6, 5, 32
129; CHECK-NEXT:    srw 5, 4, 5
130; CHECK-NEXT:    slw 3, 3, 6
131; CHECK-NEXT:    or 3, 3, 5
132; CHECK-NEXT:    isel 3, 4, 3, 2
133; CHECK-NEXT:    blr
134  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
135  ret i32 %f
136}
137
138; Verify that weird types are minimally supported.
139declare i37 @llvm.fshr.i37(i37, i37, i37)
140define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
141; CHECK-LABEL: fshr_i37:
142; CHECK:       # %bb.0:
143; CHECK-NEXT:    lis 6, -8857
144; CHECK-NEXT:    clrldi 5, 5, 27
145; CHECK-NEXT:    ori 6, 6, 51366
146; CHECK-NEXT:    sldi 6, 6, 32
147; CHECK-NEXT:    oris 6, 6, 3542
148; CHECK-NEXT:    ori 6, 6, 31883
149; CHECK-NEXT:    mulhdu 6, 5, 6
150; CHECK-NEXT:    rldicl 6, 6, 59, 5
151; CHECK-NEXT:    mulli 6, 6, 37
152; CHECK-NEXT:    subf. 5, 6, 5
153; CHECK-NEXT:    clrldi 6, 4, 27
154; CHECK-NEXT:    subfic 7, 5, 37
155; CHECK-NEXT:    srd 5, 6, 5
156; CHECK-NEXT:    sld 3, 3, 7
157; CHECK-NEXT:    or 3, 3, 5
158; CHECK-NEXT:    isel 3, 4, 3, 2
159; CHECK-NEXT:    blr
160  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
161  ret i37 %f
162}
163
164; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
165
166declare i7 @llvm.fshr.i7(i7, i7, i7)
167define i7 @fshr_i7_const_fold() {
168; CHECK-LABEL: fshr_i7_const_fold:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    li 3, 31
171; CHECK-NEXT:    blr
172  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
173  ret i7 %f
174}
175
176; With constant shift amount, this is rotate + insert (missing extended mnemonics).
177
178define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
179; CHECK-LABEL: fshr_i32_const_shift:
180; CHECK:       # %bb.0:
181; CHECK-NEXT:    rlwinm 4, 4, 23, 0, 31
182; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
183; CHECK-NEXT:    mr 3, 4
184; CHECK-NEXT:    blr
185  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
186  ret i32 %f
187}
188
189; Check modulo math on shift amount. 41-32=9.
190
191define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
192; CHECK-LABEL: fshr_i32_const_overshift:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    rlwinm 4, 4, 23, 0, 31
195; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
196; CHECK-NEXT:    mr 3, 4
197; CHECK-NEXT:    blr
198  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
199  ret i32 %f
200}
201
202; 64-bit should also work. 105-64 = 41.
203
204define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
205; CHECK-LABEL: fshr_i64_const_overshift:
206; CHECK:       # %bb.0:
207; CHECK-NEXT:    rotldi 4, 4, 23
208; CHECK-NEXT:    rldimi 4, 3, 23, 0
209; CHECK-NEXT:    mr 3, 4
210; CHECK-NEXT:    blr
211  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
212  ret i64 %f
213}
214
215; This should work without any node-specific logic.
216
217define i8 @fshr_i8_const_fold() {
218; CHECK-LABEL: fshr_i8_const_fold:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    li 3, 254
221; CHECK-NEXT:    blr
222  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
223  ret i8 %f
224}
225
226define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
227; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
228; CHECK:       # %bb.0:
229; CHECK-NEXT:    blr
230  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
231  ret i32 %f
232}
233
234define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
235; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
236; CHECK:       # %bb.0:
237; CHECK-NEXT:    mr 3, 4
238; CHECK-NEXT:    blr
239  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
240  ret i32 %f
241}
242
243define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
244; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    blr
247  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
248  ret <4 x i32> %f
249}
250
251define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
252; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
253; CHECK:       # %bb.0:
254; CHECK-NEXT:    vmr 2, 3
255; CHECK-NEXT:    blr
256  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
257  ret <4 x i32> %f
258}
259
260