1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -instcombine -S -o - %s | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5 6declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>) 7declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>) 8declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>) 9 10declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 11declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 12declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 13 14; Round-trip conversions from predicate vector to i32 back to the same 15; size of vector should be eliminated. 16 17define <4 x i1> @v2i2v_4(<4 x i1> %vin) { 18; CHECK-LABEL: @v2i2v_4( 19; CHECK-NEXT: entry: 20; CHECK-NEXT: ret <4 x i1> [[VIN:%.*]] 21; 22entry: 23 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 24 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %int) 25 ret <4 x i1> %vout 26} 27 28define <8 x i1> @v2i2v_8(<8 x i1> %vin) { 29; CHECK-LABEL: @v2i2v_8( 30; CHECK-NEXT: entry: 31; CHECK-NEXT: ret <8 x i1> [[VIN:%.*]] 32; 33entry: 34 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin) 35 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %int) 36 ret <8 x i1> %vout 37} 38 39define <16 x i1> @v2i2v_16(<16 x i1> %vin) { 40; CHECK-LABEL: @v2i2v_16( 41; CHECK-NEXT: entry: 42; CHECK-NEXT: ret <16 x i1> [[VIN:%.*]] 43; 44entry: 45 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin) 46 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %int) 47 ret <16 x i1> %vout 48} 49 50; Conversions from a predicate vector to i32 and then to a _different_ 51; size of predicate vector should be left alone. 52 53define <16 x i1> @v2i2v_4_16(<4 x i1> %vin) { 54; CHECK-LABEL: @v2i2v_4_16( 55; CHECK-NEXT: entry: 56; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0 57; CHECK-NEXT: [[VOUT:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[INT]]) 58; CHECK-NEXT: ret <16 x i1> [[VOUT]] 59; 60entry: 61 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 62 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %int) 63 ret <16 x i1> %vout 64} 65 66define <4 x i1> @v2i2v_8_4(<8 x i1> %vin) { 67; CHECK-LABEL: @v2i2v_8_4( 68; CHECK-NEXT: entry: 69; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[VIN:%.*]]), !range !0 70; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[INT]]) 71; CHECK-NEXT: ret <4 x i1> [[VOUT]] 72; 73entry: 74 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin) 75 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %int) 76 ret <4 x i1> %vout 77} 78 79define <8 x i1> @v2i2v_16_8(<16 x i1> %vin) { 80; CHECK-LABEL: @v2i2v_16_8( 81; CHECK-NEXT: entry: 82; CHECK-NEXT: [[INT:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> [[VIN:%.*]]), !range !0 83; CHECK-NEXT: [[VOUT:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[INT]]) 84; CHECK-NEXT: ret <8 x i1> [[VOUT]] 85; 86entry: 87 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin) 88 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %int) 89 ret <8 x i1> %vout 90} 91 92; Round-trip conversions from i32 to predicate vector back to i32 93; should be eliminated. 94 95define i32 @i2v2i_4(i32 %iin) { 96; CHECK-LABEL: @i2v2i_4( 97; CHECK-NEXT: entry: 98; CHECK-NEXT: ret i32 [[IIN:%.*]] 99; 100entry: 101 %vec = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %iin) 102 %iout = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vec) 103 ret i32 %iout 104} 105 106define i32 @i2v2i_8(i32 %iin) { 107; CHECK-LABEL: @i2v2i_8( 108; CHECK-NEXT: entry: 109; CHECK-NEXT: ret i32 [[IIN:%.*]] 110; 111entry: 112 %vec = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %iin) 113 %iout = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vec) 114 ret i32 %iout 115} 116 117define i32 @i2v2i_16(i32 %iin) { 118; CHECK-LABEL: @i2v2i_16( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: ret i32 [[IIN:%.*]] 121; 122entry: 123 %vec = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %iin) 124 %iout = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vec) 125 ret i32 %iout 126} 127 128; v2i leaves the top 16 bits clear. So a trunc/zext pair applied to 129; its output, going via i16, can be completely eliminated - but not 130; one going via i8. Similarly with other methods of clearing the top 131; bits, like bitwise and. 132 133define i32 @v2i_truncext_i16(<4 x i1> %vin) { 134; CHECK-LABEL: @v2i_truncext_i16( 135; CHECK-NEXT: entry: 136; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0 137; CHECK-NEXT: ret i32 [[WIDE1]] 138; 139entry: 140 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 141 %narrow = trunc i32 %wide1 to i16 142 %wide2 = zext i16 %narrow to i32 143 ret i32 %wide2 144} 145 146define i32 @v2i_truncext_i8(<4 x i1> %vin) { 147; CHECK-LABEL: @v2i_truncext_i8( 148; CHECK-NEXT: entry: 149; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0 150; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1]], 255 151; CHECK-NEXT: ret i32 [[WIDE2]] 152; 153entry: 154 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 155 %narrow = trunc i32 %wide1 to i8 156 %wide2 = zext i8 %narrow to i32 157 ret i32 %wide2 158} 159 160define i32 @v2i_and_16(<4 x i1> %vin) { 161; CHECK-LABEL: @v2i_and_16( 162; CHECK-NEXT: entry: 163; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0 164; CHECK-NEXT: ret i32 [[WIDE1]] 165; 166entry: 167 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 168 %wide2 = and i32 %wide1, 65535 169 ret i32 %wide2 170} 171 172define i32 @v2i_and_15(<4 x i1> %vin) { 173; CHECK-LABEL: @v2i_and_15( 174; CHECK-NEXT: entry: 175; CHECK-NEXT: [[WIDE1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[VIN:%.*]]), !range !0 176; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1]], 32767 177; CHECK-NEXT: ret i32 [[WIDE2]] 178; 179entry: 180 %wide1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 181 %wide2 = and i32 %wide1, 32767 182 ret i32 %wide2 183} 184 185; i2v doesn't use the top bits of its input. So the same operations 186; on a value that's about to be passed to i2v can be eliminated. 187 188define <4 x i1> @i2v_truncext_i16(i32 %wide1) { 189; CHECK-LABEL: @i2v_truncext_i16( 190; CHECK-NEXT: entry: 191; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE1:%.*]]) 192; CHECK-NEXT: ret <4 x i1> [[VOUT]] 193; 194entry: 195 %narrow = trunc i32 %wide1 to i16 196 %wide2 = zext i16 %narrow to i32 197 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2) 198 ret <4 x i1> %vout 199} 200 201define <4 x i1> @i2v_truncext_i8(i32 %wide1) { 202; CHECK-LABEL: @i2v_truncext_i8( 203; CHECK-NEXT: entry: 204; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1:%.*]], 255 205; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE2]]) 206; CHECK-NEXT: ret <4 x i1> [[VOUT]] 207; 208entry: 209 %narrow = trunc i32 %wide1 to i8 210 %wide2 = zext i8 %narrow to i32 211 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2) 212 ret <4 x i1> %vout 213} 214 215define <4 x i1> @i2v_and_16(i32 %wide1) { 216; CHECK-LABEL: @i2v_and_16( 217; CHECK-NEXT: entry: 218; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE1:%.*]]) 219; CHECK-NEXT: ret <4 x i1> [[VOUT]] 220; 221entry: 222 %wide2 = and i32 %wide1, 65535 223 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2) 224 ret <4 x i1> %vout 225} 226 227define <4 x i1> @i2v_and_15(i32 %wide1) { 228; CHECK-LABEL: @i2v_and_15( 229; CHECK-NEXT: entry: 230; CHECK-NEXT: [[WIDE2:%.*]] = and i32 [[WIDE1:%.*]], 32767 231; CHECK-NEXT: [[VOUT:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[WIDE2]]) 232; CHECK-NEXT: ret <4 x i1> [[VOUT]] 233; 234entry: 235 %wide2 = and i32 %wide1, 32767 236 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %wide2) 237 ret <4 x i1> %vout 238} 239 240; If a predicate vector is round-tripped to an integer and back, and 241; complemented while it's in integer form, we should collapse that to 242; a complement of the vector itself. (Rationale: this is likely to 243; allow it to be code-generated as MVE VPNOT.) 244 245define <4 x i1> @vpnot_4(<4 x i1> %vin) { 246; CHECK-LABEL: @vpnot_4( 247; CHECK-NEXT: entry: 248; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true> 249; CHECK-NEXT: ret <4 x i1> [[VOUT]] 250; 251entry: 252 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 253 %flipped = xor i32 %int, 65535 254 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %flipped) 255 ret <4 x i1> %vout 256} 257 258define <8 x i1> @vpnot_8(<8 x i1> %vin) { 259; CHECK-LABEL: @vpnot_8( 260; CHECK-NEXT: entry: 261; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 262; CHECK-NEXT: ret <8 x i1> [[VOUT]] 263; 264entry: 265 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin) 266 %flipped = xor i32 %int, 65535 267 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %flipped) 268 ret <8 x i1> %vout 269} 270 271define <16 x i1> @vpnot_16(<16 x i1> %vin) { 272; CHECK-LABEL: @vpnot_16( 273; CHECK-NEXT: entry: 274; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 275; CHECK-NEXT: ret <16 x i1> [[VOUT]] 276; 277entry: 278 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin) 279 %flipped = xor i32 %int, 65535 280 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %flipped) 281 ret <16 x i1> %vout 282} 283 284; And this still works even if the i32 is narrowed to i16 and back on 285; opposite sides of the xor. 286 287define <4 x i1> @vpnot_narrow_4(<4 x i1> %vin) { 288; CHECK-LABEL: @vpnot_narrow_4( 289; CHECK-NEXT: entry: 290; CHECK-NEXT: [[VOUT:%.*]] = xor <4 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true> 291; CHECK-NEXT: ret <4 x i1> [[VOUT]] 292; 293entry: 294 %int = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %vin) 295 %narrow = trunc i32 %int to i16 296 %flipped_narrow = xor i16 %narrow, -1 297 %flipped = zext i16 %flipped_narrow to i32 298 %vout = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %flipped) 299 ret <4 x i1> %vout 300} 301 302define <8 x i1> @vpnot_narrow_8(<8 x i1> %vin) { 303; CHECK-LABEL: @vpnot_narrow_8( 304; CHECK-NEXT: entry: 305; CHECK-NEXT: [[VOUT:%.*]] = xor <8 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 306; CHECK-NEXT: ret <8 x i1> [[VOUT]] 307; 308entry: 309 %int = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> %vin) 310 %narrow = trunc i32 %int to i16 311 %flipped_narrow = xor i16 %narrow, -1 312 %flipped = zext i16 %flipped_narrow to i32 313 %vout = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %flipped) 314 ret <8 x i1> %vout 315} 316 317define <16 x i1> @vpnot_narrow_16(<16 x i1> %vin) { 318; CHECK-LABEL: @vpnot_narrow_16( 319; CHECK-NEXT: entry: 320; CHECK-NEXT: [[VOUT:%.*]] = xor <16 x i1> [[VIN:%.*]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 321; CHECK-NEXT: ret <16 x i1> [[VOUT]] 322; 323entry: 324 %int = call i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1> %vin) 325 %narrow = trunc i32 %int to i16 326 %flipped_narrow = xor i16 %narrow, -1 327 %flipped = zext i16 %flipped_narrow to i32 328 %vout = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %flipped) 329 ret <16 x i1> %vout 330} 331