1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4declare i8 @llvm.fshl.i8(i8, i8, i8) 5declare i16 @llvm.fshl.i16(i16, i16, i16) 6declare i32 @llvm.fshl.i32(i32, i32, i32) 7declare i64 @llvm.fshl.i64(i64, i64, i64) 8declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 9 10declare i8 @llvm.fshr.i8(i8, i8, i8) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare i32 @llvm.fshr.i32(i32, i32, i32) 13declare i64 @llvm.fshr.i64(i64, i64, i64) 14declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 15 16; General case - all operands can be variables. 17 18define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { 19; CHECK-LABEL: fshl_i32: 20; CHECK: // %bb.0: 21; CHECK-NEXT: and w9, w2, #0x1f 22; CHECK-NEXT: neg w9, w9 23; CHECK-NEXT: lsl w8, w0, w2 24; CHECK-NEXT: lsr w9, w1, w9 25; CHECK-NEXT: orr w8, w8, w9 26; CHECK-NEXT: tst w2, #0x1f 27; CHECK-NEXT: csel w0, w0, w8, eq 28; CHECK-NEXT: ret 29 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 30 ret i32 %f 31} 32 33; Verify that weird types are minimally supported. 34declare i37 @llvm.fshl.i37(i37, i37, i37) 35define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { 36; CHECK-LABEL: fshl_i37: 37; CHECK: // %bb.0: 38; CHECK-NEXT: mov x10, #31883 39; CHECK-NEXT: movk x10, #3542, lsl #16 40; CHECK-NEXT: movk x10, #51366, lsl #32 41; CHECK-NEXT: and x9, x2, #0x1fffffffff 42; CHECK-NEXT: movk x10, #56679, lsl #48 43; CHECK-NEXT: umulh x10, x9, x10 44; CHECK-NEXT: mov w11, #37 45; CHECK-NEXT: lsr x10, x10, #5 46; CHECK-NEXT: msub x9, x10, x11, x9 47; CHECK-NEXT: and x8, x1, #0x1fffffffff 48; CHECK-NEXT: sub x11, x11, x9 49; CHECK-NEXT: lsl x10, x0, x9 50; CHECK-NEXT: lsr x8, x8, x11 51; CHECK-NEXT: orr x8, x10, x8 52; CHECK-NEXT: cmp x9, #0 // =0 53; CHECK-NEXT: csel x0, x0, x8, eq 54; CHECK-NEXT: ret 55 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 56 ret i37 %f 57} 58 59; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 60 61declare i7 @llvm.fshl.i7(i7, i7, i7) 62define i7 @fshl_i7_const_fold() { 63; CHECK-LABEL: fshl_i7_const_fold: 64; CHECK: // %bb.0: 65; CHECK-NEXT: mov w0, #67 66; CHECK-NEXT: ret 67 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 68 ret i7 %f 69} 70 71define i8 @fshl_i8_const_fold_overshift_1() { 72; CHECK-LABEL: fshl_i8_const_fold_overshift_1: 73; CHECK: // %bb.0: 74; CHECK-NEXT: mov w0, #128 75; CHECK-NEXT: ret 76 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) 77 ret i8 %f 78} 79 80define i8 @fshl_i8_const_fold_overshift_2() { 81; CHECK-LABEL: fshl_i8_const_fold_overshift_2: 82; CHECK: // %bb.0: 83; CHECK-NEXT: mov w0, #120 84; CHECK-NEXT: ret 85 %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) 86 ret i8 %f 87} 88 89define i8 @fshl_i8_const_fold_overshift_3() { 90; CHECK-LABEL: fshl_i8_const_fold_overshift_3: 91; CHECK: // %bb.0: 92; CHECK-NEXT: mov w0, wzr 93; CHECK-NEXT: ret 94 %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8) 95 ret i8 %f 96} 97 98; With constant shift amount, this is 'extr'. 99 100define i32 @fshl_i32_const_shift(i32 %x, i32 %y) { 101; CHECK-LABEL: fshl_i32_const_shift: 102; CHECK: // %bb.0: 103; CHECK-NEXT: extr w0, w0, w1, #23 104; CHECK-NEXT: ret 105 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 106 ret i32 %f 107} 108 109; Check modulo math on shift amount. 110 111define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { 112; CHECK-LABEL: fshl_i32_const_overshift: 113; CHECK: // %bb.0: 114; CHECK-NEXT: extr w0, w0, w1, #23 115; CHECK-NEXT: ret 116 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 117 ret i32 %f 118} 119 120; 64-bit should also work. 121 122define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { 123; CHECK-LABEL: fshl_i64_const_overshift: 124; CHECK: // %bb.0: 125; CHECK-NEXT: extr x0, x0, x1, #23 126; CHECK-NEXT: ret 127 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 128 ret i64 %f 129} 130 131; This should work without any node-specific logic. 132 133define i8 @fshl_i8_const_fold() { 134; CHECK-LABEL: fshl_i8_const_fold: 135; CHECK: // %bb.0: 136; CHECK-NEXT: mov w0, #128 137; CHECK-NEXT: ret 138 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 139 ret i8 %f 140} 141 142; Repeat everything for funnel shift right. 143 144; General case - all operands can be variables. 145 146define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { 147; CHECK-LABEL: fshr_i32: 148; CHECK: // %bb.0: 149; CHECK-NEXT: and w9, w2, #0x1f 150; CHECK-NEXT: neg w9, w9 151; CHECK-NEXT: lsr w8, w1, w2 152; CHECK-NEXT: lsl w9, w0, w9 153; CHECK-NEXT: orr w8, w9, w8 154; CHECK-NEXT: tst w2, #0x1f 155; CHECK-NEXT: csel w0, w1, w8, eq 156; CHECK-NEXT: ret 157 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 158 ret i32 %f 159} 160 161; Verify that weird types are minimally supported. 162declare i37 @llvm.fshr.i37(i37, i37, i37) 163define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { 164; CHECK-LABEL: fshr_i37: 165; CHECK: // %bb.0: 166; CHECK-NEXT: mov x10, #31883 167; CHECK-NEXT: movk x10, #3542, lsl #16 168; CHECK-NEXT: movk x10, #51366, lsl #32 169; CHECK-NEXT: and x9, x2, #0x1fffffffff 170; CHECK-NEXT: movk x10, #56679, lsl #48 171; CHECK-NEXT: umulh x10, x9, x10 172; CHECK-NEXT: mov w11, #37 173; CHECK-NEXT: lsr x10, x10, #5 174; CHECK-NEXT: msub x9, x10, x11, x9 175; CHECK-NEXT: and x8, x1, #0x1fffffffff 176; CHECK-NEXT: sub x10, x11, x9 177; CHECK-NEXT: lsr x8, x8, x9 178; CHECK-NEXT: lsl x10, x0, x10 179; CHECK-NEXT: orr x8, x10, x8 180; CHECK-NEXT: cmp x9, #0 // =0 181; CHECK-NEXT: csel x0, x1, x8, eq 182; CHECK-NEXT: ret 183 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 184 ret i37 %f 185} 186 187; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 188 189declare i7 @llvm.fshr.i7(i7, i7, i7) 190define i7 @fshr_i7_const_fold() { 191; CHECK-LABEL: fshr_i7_const_fold: 192; CHECK: // %bb.0: 193; CHECK-NEXT: mov w0, #31 194; CHECK-NEXT: ret 195 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 196 ret i7 %f 197} 198 199define i8 @fshr_i8_const_fold_overshift_1() { 200; CHECK-LABEL: fshr_i8_const_fold_overshift_1: 201; CHECK: // %bb.0: 202; CHECK-NEXT: mov w0, #254 203; CHECK-NEXT: ret 204 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) 205 ret i8 %f 206} 207 208define i8 @fshr_i8_const_fold_overshift_2() { 209; CHECK-LABEL: fshr_i8_const_fold_overshift_2: 210; CHECK: // %bb.0: 211; CHECK-NEXT: mov w0, #225 212; CHECK-NEXT: ret 213 %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) 214 ret i8 %f 215} 216 217define i8 @fshr_i8_const_fold_overshift_3() { 218; CHECK-LABEL: fshr_i8_const_fold_overshift_3: 219; CHECK: // %bb.0: 220; CHECK-NEXT: mov w0, #255 221; CHECK-NEXT: ret 222 %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) 223 ret i8 %f 224} 225 226; With constant shift amount, this is 'extr'. 227 228define i32 @fshr_i32_const_shift(i32 %x, i32 %y) { 229; CHECK-LABEL: fshr_i32_const_shift: 230; CHECK: // %bb.0: 231; CHECK-NEXT: extr w0, w0, w1, #9 232; CHECK-NEXT: ret 233 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 234 ret i32 %f 235} 236 237; Check modulo math on shift amount. 41-32=9. 238 239define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) { 240; CHECK-LABEL: fshr_i32_const_overshift: 241; CHECK: // %bb.0: 242; CHECK-NEXT: extr w0, w0, w1, #9 243; CHECK-NEXT: ret 244 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 245 ret i32 %f 246} 247 248; 64-bit should also work. 105-64 = 41. 249 250define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) { 251; CHECK-LABEL: fshr_i64_const_overshift: 252; CHECK: // %bb.0: 253; CHECK-NEXT: extr x0, x0, x1, #41 254; CHECK-NEXT: ret 255 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 256 ret i64 %f 257} 258 259; This should work without any node-specific logic. 260 261define i8 @fshr_i8_const_fold() { 262; CHECK-LABEL: fshr_i8_const_fold: 263; CHECK: // %bb.0: 264; CHECK-NEXT: mov w0, #254 265; CHECK-NEXT: ret 266 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 267 ret i8 %f 268} 269 270define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { 271; CHECK-LABEL: fshl_i32_shift_by_bitwidth: 272; CHECK: // %bb.0: 273; CHECK-NEXT: ret 274 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 275 ret i32 %f 276} 277 278define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) { 279; CHECK-LABEL: fshr_i32_shift_by_bitwidth: 280; CHECK: // %bb.0: 281; CHECK-NEXT: mov w0, w1 282; CHECK-NEXT: ret 283 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 284 ret i32 %f 285} 286 287define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 288; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 289; CHECK: // %bb.0: 290; CHECK-NEXT: ret 291 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 292 ret <4 x i32> %f 293} 294 295define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) { 296; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth: 297; CHECK: // %bb.0: 298; CHECK-NEXT: mov v0.16b, v1.16b 299; CHECK-NEXT: ret 300 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 301 ret <4 x i32> %f 302} 303 304