1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s 3 4declare i8 @llvm.fshl.i8(i8, i8, i8) 5declare i16 @llvm.fshl.i16(i16, i16, i16) 6declare i32 @llvm.fshl.i32(i32, i32, i32) 7declare i64 @llvm.fshl.i64(i64, i64, i64) 8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 9 10declare i8 @llvm.fshr.i8(i8, i8, i8) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare i32 @llvm.fshr.i32(i32, i32, i32) 13declare i64 @llvm.fshr.i64(i64, i64, i64) 14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 15 16; General case - all operands can be variables. 17 18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 19; CHECK-LABEL: fshl_i32: 20; CHECK: # %bb.0: 21; CHECK-NEXT: andi. 5, 5, 31 22; CHECK-NEXT: subfic 6, 5, 32 23; CHECK-NEXT: slw 5, 3, 5 24; CHECK-NEXT: srw 4, 4, 6 25; CHECK-NEXT: or 4, 5, 4 26; CHECK-NEXT: isel 3, 3, 4, 2 27; CHECK-NEXT: blr 28 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 29 ret i32 %f 30} 31 32; Verify that weird types are minimally supported. 33declare i37 @llvm.fshl.i37(i37, i37, i37) 34define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 35; CHECK-LABEL: fshl_i37: 36; CHECK: # %bb.0: 37; CHECK-NEXT: lis 6, -8857 38; CHECK-NEXT: clrldi 5, 5, 27 39; CHECK-NEXT: ori 6, 6, 51366 40; CHECK-NEXT: clrldi 4, 4, 27 41; CHECK-NEXT: sldi 6, 6, 32 42; CHECK-NEXT: oris 6, 6, 3542 43; CHECK-NEXT: ori 6, 6, 31883 44; CHECK-NEXT: mulhdu 6, 5, 6 45; CHECK-NEXT: rldicl 6, 6, 59, 5 46; CHECK-NEXT: mulli 6, 6, 37 47; CHECK-NEXT: subf. 5, 6, 5 48; CHECK-NEXT: subfic 6, 5, 37 49; CHECK-NEXT: sld 5, 3, 5 50; CHECK-NEXT: srd 4, 4, 6 51; CHECK-NEXT: or 4, 5, 4 52; CHECK-NEXT: isel 3, 3, 4, 2 53; CHECK-NEXT: blr 54 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 55 ret i37 %f 56} 57 58; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 59 60declare i7 @llvm.fshl.i7(i7, i7, i7) 61define i7 @fshl_i7_const_fold() { 62; CHECK-LABEL: fshl_i7_const_fold: 63; CHECK: # %bb.0: 64; CHECK-NEXT: li 3, 67 65; CHECK-NEXT: blr 66 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 67 ret i7 %f 68} 69 70; With constant shift amount, this is rotate + insert (missing extended mnemonics). 71 72define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 73; CHECK-LABEL: fshl_i32_const_shift: 74; CHECK: # %bb.0: 75; CHECK-NEXT: rlwinm 4, 4, 9, 0, 31 76; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 77; CHECK-NEXT: mr 3, 4 78; CHECK-NEXT: blr 79 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 80 ret i32 %f 81} 82 83; Check modulo math on shift amount. 84 85define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 86; CHECK-LABEL: fshl_i32_const_overshift: 87; CHECK: # %bb.0: 88; CHECK-NEXT: rlwinm 4, 4, 9, 0, 31 89; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22 90; CHECK-NEXT: mr 3, 4 91; CHECK-NEXT: blr 92 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 93 ret i32 %f 94} 95 96; 64-bit should also work. 97 98define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 99; CHECK-LABEL: fshl_i64_const_overshift: 100; CHECK: # %bb.0: 101; CHECK-NEXT: rotldi 4, 4, 41 102; CHECK-NEXT: rldimi 4, 3, 41, 0 103; CHECK-NEXT: mr 3, 4 104; CHECK-NEXT: blr 105 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 106 ret i64 %f 107} 108 109; This should work without any node-specific logic. 110 111define i8 @fshl_i8_const_fold() { 112; CHECK-LABEL: fshl_i8_const_fold: 113; CHECK: # %bb.0: 114; CHECK-NEXT: li 3, 128 115; CHECK-NEXT: blr 116 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 117 ret i8 %f 118} 119 120; Repeat everything for funnel shift right. 121 122; General case - all operands can be variables. 123 124define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 125; CHECK-LABEL: fshr_i32: 126; CHECK: # %bb.0: 127; CHECK-NEXT: andi. 5, 5, 31 128; CHECK-NEXT: subfic 6, 5, 32 129; CHECK-NEXT: srw 5, 4, 5 130; CHECK-NEXT: slw 3, 3, 6 131; CHECK-NEXT: or 3, 3, 5 132; CHECK-NEXT: isel 3, 4, 3, 2 133; CHECK-NEXT: blr 134 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 135 ret i32 %f 136} 137 138; Verify that weird types are minimally supported. 139declare i37 @llvm.fshr.i37(i37, i37, i37) 140define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 141; CHECK-LABEL: fshr_i37: 142; CHECK: # %bb.0: 143; CHECK-NEXT: lis 6, -8857 144; CHECK-NEXT: clrldi 5, 5, 27 145; CHECK-NEXT: ori 6, 6, 51366 146; CHECK-NEXT: sldi 6, 6, 32 147; CHECK-NEXT: oris 6, 6, 3542 148; CHECK-NEXT: ori 6, 6, 31883 149; CHECK-NEXT: mulhdu 6, 5, 6 150; CHECK-NEXT: rldicl 6, 6, 59, 5 151; CHECK-NEXT: mulli 6, 6, 37 152; CHECK-NEXT: subf. 5, 6, 5 153; CHECK-NEXT: clrldi 6, 4, 27 154; CHECK-NEXT: subfic 7, 5, 37 155; CHECK-NEXT: srd 5, 6, 5 156; CHECK-NEXT: sld 3, 3, 7 157; CHECK-NEXT: or 3, 3, 5 158; CHECK-NEXT: isel 3, 4, 3, 2 159; CHECK-NEXT: blr 160 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 161 ret i37 %f 162} 163 164; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 165 166declare i7 @llvm.fshr.i7(i7, i7, i7) 167define i7 @fshr_i7_const_fold() { 168; CHECK-LABEL: fshr_i7_const_fold: 169; CHECK: # %bb.0: 170; CHECK-NEXT: li 3, 31 171; CHECK-NEXT: blr 172 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 173 ret i7 %f 174} 175 176; With constant shift amount, this is rotate + insert (missing extended mnemonics). 177 178define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 179; CHECK-LABEL: fshr_i32_const_shift: 180; CHECK: # %bb.0: 181; CHECK-NEXT: rlwinm 4, 4, 23, 0, 31 182; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 183; CHECK-NEXT: mr 3, 4 184; CHECK-NEXT: blr 185 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 186 ret i32 %f 187} 188 189; Check modulo math on shift amount. 41-32=9. 190 191define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 192; CHECK-LABEL: fshr_i32_const_overshift: 193; CHECK: # %bb.0: 194; CHECK-NEXT: rlwinm 4, 4, 23, 0, 31 195; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8 196; CHECK-NEXT: mr 3, 4 197; CHECK-NEXT: blr 198 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 199 ret i32 %f 200} 201 202; 64-bit should also work. 105-64 = 41. 203 204define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 205; CHECK-LABEL: fshr_i64_const_overshift: 206; CHECK: # %bb.0: 207; CHECK-NEXT: rotldi 4, 4, 23 208; CHECK-NEXT: rldimi 4, 3, 23, 0 209; CHECK-NEXT: mr 3, 4 210; CHECK-NEXT: blr 211 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 212 ret i64 %f 213} 214 215; This should work without any node-specific logic. 216 217define i8 @fshr_i8_const_fold() { 218; CHECK-LABEL: fshr_i8_const_fold: 219; CHECK: # %bb.0: 220; CHECK-NEXT: li 3, 254 221; CHECK-NEXT: blr 222 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 223 ret i8 %f 224} 225 226define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 227; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 228; CHECK: # %bb.0: 229; CHECK-NEXT: blr 230 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 231 ret i32 %f 232} 233 234define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 235; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 236; CHECK: # %bb.0: 237; CHECK-NEXT: mr 3, 4 238; CHECK-NEXT: blr 239 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 240 ret i32 %f 241} 242 243define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 244; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 245; CHECK: # %bb.0: 246; CHECK-NEXT: blr 247 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 248 ret <4 x i32> %f 249} 250 251define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 252; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth: 253; CHECK: # %bb.0: 254; CHECK-NEXT: vmr 2, 3 255; CHECK-NEXT: blr 256 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 257 ret <4 x i32> %f 258} 259 260