1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BIGENDIAN 3; RUN: opt < %s -instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LITTLEENDIAN 4 5; If any bits of the shift amount are known to make it exceed or equal 6; the number of bits in the type, the shift causes undefined behavior. 7 8define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) { 9; CHECK-LABEL: @shl_amount_is_known_bogus( 10; CHECK-NEXT: ret i32 poison 11; 12 %or = or i32 %b, 32 13 %shl = shl i32 %a, %or 14 ret i32 %shl 15} 16 17; Check some weird types and the other shift ops. 18 19define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) { 20; CHECK-LABEL: @lshr_amount_is_known_bogus( 21; CHECK-NEXT: ret i31 poison 22; 23 %or = or i31 %b, 31 24 %shr = lshr i31 %a, %or 25 ret i31 %shr 26} 27 28define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) { 29; CHECK-LABEL: @ashr_amount_is_known_bogus( 30; CHECK-NEXT: ret i33 poison 31; 32 %or = or i33 %b, 33 33 %shr = ashr i33 %a, %or 34 ret i33 %shr 35} 36 37 38; If all valid bits of the shift amount are known 0, there's no shift. 39; It doesn't matter if high bits are set because that would be undefined. 40; Therefore, the only possible valid result of these shifts is %a. 41 42define i16 @ashr_amount_is_zero(i16 %a, i16 %b) { 43; CHECK-LABEL: @ashr_amount_is_zero( 44; CHECK-NEXT: ret i16 [[A:%.*]] 45; 46 %and = and i16 %b, 65520 ; 0xfff0 47 %shr = ashr i16 %a, %and 48 ret i16 %shr 49} 50 51define i300 @lshr_amount_is_zero(i300 %a, i300 %b) { 52; CHECK-LABEL: @lshr_amount_is_zero( 53; CHECK-NEXT: ret i300 [[A:%.*]] 54; 55 %and = and i300 %b, 2048 56 %shr = lshr i300 %a, %and 57 ret i300 %shr 58} 59 60define i9 @shl_amount_is_zero(i9 %a, i9 %b) { 61; CHECK-LABEL: @shl_amount_is_zero( 62; CHECK-NEXT: ret i9 [[A:%.*]] 63; 64 %and = and i9 %b, 496 ; 0x1f0 65 %shl = shl i9 %a, %and 66 ret i9 %shl 67} 68 69 70; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type. 71 72define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) { 73; CHECK-LABEL: @shl_amount_is_not_known_zero( 74; CHECK-NEXT: [[AND:%.*]] = and i9 [[B:%.*]], -8 75; CHECK-NEXT: [[SHL:%.*]] = shl i9 [[A:%.*]], [[AND]] 76; CHECK-NEXT: ret i9 [[SHL]] 77; 78 %and = and i9 %b, 504 ; 0x1f8 79 %shl = shl i9 %a, %and 80 ret i9 %shl 81} 82 83 84; For vectors, we need all scalar elements to meet the requirements to optimize. 85 86define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) { 87; CHECK-LABEL: @ashr_vector_bogus( 88; CHECK-NEXT: ret <2 x i32> poison 89; 90 %or = or <2 x i32> %b, <i32 32, i32 32> 91 %shr = ashr <2 x i32> %a, %or 92 ret <2 x i32> %shr 93} 94 95; FIXME: This is undef, but computeKnownBits doesn't handle the union. 96define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) { 97; CHECK-LABEL: @shl_vector_bogus( 98; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[B:%.*]], <i32 32, i32 64> 99; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[OR]] 100; CHECK-NEXT: ret <2 x i32> [[SHL]] 101; 102 %or = or <2 x i32> %b, <i32 32, i32 64> 103 %shl = shl <2 x i32> %a, %or 104 ret <2 x i32> %shl 105} 106 107define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) { 108; CHECK-LABEL: @lshr_vector_zero( 109; CHECK-NEXT: ret <2 x i32> [[A:%.*]] 110; 111 %and = and <2 x i32> %b, <i32 64, i32 256> 112 %shr = lshr <2 x i32> %a, %and 113 ret <2 x i32> %shr 114} 115 116; Make sure that weird vector types work too. 117define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) { 118; CHECK-LABEL: @shl_vector_zero( 119; CHECK-NEXT: ret <2 x i15> [[A:%.*]] 120; 121 %and = and <2 x i15> %b, <i15 1024, i15 1024> 122 %shl = shl <2 x i15> %a, %and 123 ret <2 x i15> %shl 124} 125 126define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) { 127; CHECK-LABEL: @shl_vector_for_real( 128; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], <i32 3, i32 3> 129; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]] 130; CHECK-NEXT: ret <2 x i32> [[SHL]] 131; 132 %and = and <2 x i32> %b, <i32 3, i32 3> ; a necessary mask op 133 %shl = shl <2 x i32> %a, %and 134 ret <2 x i32> %shl 135} 136 137 138; We calculate the valid bits of the shift using log2, and log2 of 1 (the type width) is 0. 139; That should be ok. Either the shift amount is 0 or invalid (1), so we can always return %a. 140 141define i1 @shl_i1(i1 %a, i1 %b) { 142; CHECK-LABEL: @shl_i1( 143; CHECK-NEXT: ret i1 [[A:%.*]] 144; 145 %shl = shl i1 %a, %b 146 ret i1 %shl 147} 148 149; The following cases only get folded by InstCombine, 150; see InstCombine/lshr.ll. 151 152declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone 153declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 154declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone 155declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone 156 157define i32 @lshr_ctlz_zero_is_undef(i32 %x) { 158; CHECK-LABEL: @lshr_ctlz_zero_is_undef( 159; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true) 160; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5 161; CHECK-NEXT: ret i32 [[SH]] 162; 163 %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true) 164 %sh = lshr i32 %ct, 5 165 ret i32 %sh 166} 167 168define i32 @lshr_cttz_zero_is_undef(i32 %x) { 169; CHECK-LABEL: @lshr_cttz_zero_is_undef( 170; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) 171; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5 172; CHECK-NEXT: ret i32 [[SH]] 173; 174 %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true) 175 %sh = lshr i32 %ct, 5 176 ret i32 %sh 177} 178 179define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) { 180; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec( 181; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) 182; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3> 183; CHECK-NEXT: ret <2 x i8> [[SH]] 184; 185 %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) 186 %sh = lshr <2 x i8> %ct, <i8 3, i8 3> 187 ret <2 x i8> %sh 188} 189 190define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) { 191; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec( 192; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) 193; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0> 194; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0 195; CHECK-NEXT: ret i8 [[EX]] 196; 197 %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) 198 %sh = lshr <2 x i8> %ct, <i8 3, i8 0> 199 %ex = extractelement <2 x i8> %sh, i32 0 200 ret i8 %ex 201} 202 203define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) { 204; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec( 205; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) 206; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3> 207; CHECK-NEXT: ret <2 x i8> [[SH]] 208; 209 %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) 210 %sh = lshr <2 x i8> %ct, <i8 3, i8 3> 211 ret <2 x i8> %sh 212} 213 214define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { 215; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec( 216; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) 217; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0> 218; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0 219; CHECK-NEXT: ret i8 [[EX]] 220; 221 %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) 222 %sh = lshr <2 x i8> %ct, <i8 3, i8 0> 223 %ex = extractelement <2 x i8> %sh, i32 0 224 ret i8 %ex 225} 226 227; The shift amount is 0 on either of high/low bytes. The middle byte doesn't matter. 228 229define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) { 230; CHECK-LABEL: @bitcast_noshift_scalar( 231; CHECK-NEXT: ret i24 [[V2:%.*]] 232; 233 %c = insertelement <3 x i8> poison, i8 0, i64 0 234 %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 3> 235 %b = bitcast <3 x i8> %s to i24 236 %r = shl i24 %v2, %b 237 ret i24 %r 238} 239 240; The shift amount is 0 on low byte of big-endian and unknown on little-endian. 241 242define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) { 243; BIGENDIAN-LABEL: @bitcast_noshift_scalar_bigend( 244; BIGENDIAN-NEXT: ret i24 [[V2:%.*]] 245; 246; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_bigend( 247; LITTLEENDIAN-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3> 248; LITTLEENDIAN-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 249; LITTLEENDIAN-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] 250; LITTLEENDIAN-NEXT: ret i24 [[R]] 251; 252 %c = insertelement <3 x i8> poison, i8 0, i64 0 253 %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 0, i32 1, i32 3> 254 %b = bitcast <3 x i8> %s to i24 255 %r = shl i24 %v2, %b 256 ret i24 %r 257} 258 259; The shift amount is 0 on low byte of little-endian and unknown on big-endian. 260 261define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) { 262; BIGENDIAN-LABEL: @bitcast_noshift_scalar_littleend( 263; BIGENDIAN-NEXT: [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2> 264; BIGENDIAN-NEXT: [[B:%.*]] = bitcast <3 x i8> [[S]] to i24 265; BIGENDIAN-NEXT: [[R:%.*]] = shl i24 [[V2:%.*]], [[B]] 266; BIGENDIAN-NEXT: ret i24 [[R]] 267; 268; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_littleend( 269; LITTLEENDIAN-NEXT: ret i24 [[V2:%.*]] 270; 271 %c = insertelement <3 x i8> poison, i8 0, i64 0 272 %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 2> 273 %b = bitcast <3 x i8> %s to i24 274 %r = shl i24 %v2, %b 275 ret i24 %r 276} 277 278; The shift amount is known 24 on little-endian and known 24<<16 on big-endian 279; across all vector elements, so it's an overshift either way. 280 281define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) { 282; CHECK-LABEL: @bitcast_overshift_vector( 283; CHECK-NEXT: ret <3 x i24> poison 284; 285 %c = insertelement <9 x i8> poison, i8 24, i64 0 286 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8> 287 %b = bitcast <9 x i8> %s to <3 x i24> 288 %r = shl <3 x i24> %v2, %b 289 ret <3 x i24> %r 290} 291 292; The shift amount is known 23 on little-endian and known 23<<16 on big-endian 293; across all vector elements, so it's an overshift for big-endian. 294 295define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) { 296; BIGENDIAN-LABEL: @bitcast_overshift_vector_bigend( 297; BIGENDIAN-NEXT: ret <3 x i24> poison 298; 299; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_bigend( 300; LITTLEENDIAN-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8> 301; LITTLEENDIAN-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> 302; LITTLEENDIAN-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] 303; LITTLEENDIAN-NEXT: ret <3 x i24> [[R]] 304; 305 %c = insertelement <9 x i8> poison, i8 23, i64 0 306 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8> 307 %b = bitcast <9 x i8> %s to <3 x i24> 308 %r = shl <3 x i24> %v2, %b 309 ret <3 x i24> %r 310} 311 312; The shift amount is known 23 on big-endian and known 23<<16 on little-endian 313; across all vector elements, so it's an overshift for little-endian. 314 315define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2) { 316; BIGENDIAN-LABEL: @bitcast_overshift_vector_littleend( 317; BIGENDIAN-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9> 318; BIGENDIAN-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> 319; BIGENDIAN-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] 320; BIGENDIAN-NEXT: ret <3 x i24> [[R]] 321; 322; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_littleend( 323; LITTLEENDIAN-NEXT: ret <3 x i24> poison 324; 325 %c = insertelement <9 x i8> poison, i8 23, i64 0 326 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9> 327 %b = bitcast <9 x i8> %s to <3 x i24> 328 %r = shl <3 x i24> %v2, %b 329 ret <3 x i24> %r 330} 331 332; Negative test - the shift amount is known 24 or 24<<16 on only 2 out of 3 elements. 333 334define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) { 335; CHECK-LABEL: @bitcast_partial_overshift_vector( 336; CHECK-NEXT: [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8> 337; CHECK-NEXT: [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24> 338; CHECK-NEXT: [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]] 339; CHECK-NEXT: ret <3 x i24> [[R]] 340; 341 %c = insertelement <9 x i8> poison, i8 24, i64 0 342 %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8> 343 %b = bitcast <9 x i8> %s to <3 x i24> 344 %r = shl <3 x i24> %v2, %b 345 ret <3 x i24> %r 346} 347 348; Negative test - don't know how to look through a cast with non-integer type (but we could handle this...). 349 350define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %v2) { 351; CHECK-LABEL: @bitcast_noshift_vector_wrong_type( 352; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 1> 353; CHECK-NEXT: [[B:%.*]] = bitcast <2 x float> [[S]] to <1 x i64> 354; CHECK-NEXT: [[R:%.*]] = shl <1 x i64> [[V2:%.*]], [[B]] 355; CHECK-NEXT: ret <1 x i64> [[R]] 356; 357 %c = insertelement <2 x float> poison, float 0.0, i64 0 358 %s = shufflevector <2 x float> %v1, <2 x float> %c, <2 x i32> <i32 2, i32 1> 359 %b = bitcast <2 x float> %s to <1 x i64> 360 %r = shl <1 x i64> %v2, %b 361 ret <1 x i64> %r 362} 363