1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,GENERIC 3; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m3 | FileCheck %s --check-prefixes=CHECK,EXYNOSM3 4 5declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 6 7declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 8 9declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 10 11declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 12 13declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 14 15declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 16 17declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 18 19declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 20 21declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 22 23declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 24 25declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 26 27declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 28 29declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 30 31declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 32 33declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 34 35declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 36 37declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 38 39declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 40 41declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 42 43declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 44 45declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 46 47define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 48; CHECK-LABEL: test_vmla_lane_s16: 49; CHECK: // %bb.0: // %entry 50; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 51; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[3] 52; CHECK-NEXT: ret 53entry: 54 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 55 %mul = mul <4 x i16> %shuffle, %b 56 %add = add <4 x i16> %mul, %a 57 ret <4 x i16> %add 58} 59 60define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 61; CHECK-LABEL: test_vmlaq_lane_s16: 62; CHECK: // %bb.0: // %entry 63; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 64; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[3] 65; CHECK-NEXT: ret 66entry: 67 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 68 %mul = mul <8 x i16> %shuffle, %b 69 %add = add <8 x i16> %mul, %a 70 ret <8 x i16> %add 71} 72 73define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 74; CHECK-LABEL: test_vmla_lane_s32: 75; CHECK: // %bb.0: // %entry 76; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 77; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[1] 78; CHECK-NEXT: ret 79entry: 80 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 81 %mul = mul <2 x i32> %shuffle, %b 82 %add = add <2 x i32> %mul, %a 83 ret <2 x i32> %add 84} 85 86define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 87; CHECK-LABEL: test_vmlaq_lane_s32: 88; CHECK: // %bb.0: // %entry 89; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 90; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[1] 91; CHECK-NEXT: ret 92entry: 93 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 94 %mul = mul <4 x i32> %shuffle, %b 95 %add = add <4 x i32> %mul, %a 96 ret <4 x i32> %add 97} 98 99define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 100; CHECK-LABEL: test_vmla_laneq_s16: 101; CHECK: // %bb.0: // %entry 102; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[7] 103; CHECK-NEXT: ret 104entry: 105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 106 %mul = mul <4 x i16> %shuffle, %b 107 %add = add <4 x i16> %mul, %a 108 ret <4 x i16> %add 109} 110 111define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 112; CHECK-LABEL: test_vmlaq_laneq_s16: 113; CHECK: // %bb.0: // %entry 114; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[7] 115; CHECK-NEXT: ret 116entry: 117 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 118 %mul = mul <8 x i16> %shuffle, %b 119 %add = add <8 x i16> %mul, %a 120 ret <8 x i16> %add 121} 122 123define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 124; CHECK-LABEL: test_vmla_laneq_s32: 125; CHECK: // %bb.0: // %entry 126; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3] 127; CHECK-NEXT: ret 128entry: 129 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 130 %mul = mul <2 x i32> %shuffle, %b 131 %add = add <2 x i32> %mul, %a 132 ret <2 x i32> %add 133} 134 135define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 136; CHECK-LABEL: test_vmlaq_laneq_s32: 137; CHECK: // %bb.0: // %entry 138; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[3] 139; CHECK-NEXT: ret 140entry: 141 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 142 %mul = mul <4 x i32> %shuffle, %b 143 %add = add <4 x i32> %mul, %a 144 ret <4 x i32> %add 145} 146 147define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 148; CHECK-LABEL: test_vmls_lane_s16: 149; CHECK: // %bb.0: // %entry 150; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 151; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[3] 152; CHECK-NEXT: ret 153entry: 154 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 155 %mul = mul <4 x i16> %shuffle, %b 156 %sub = sub <4 x i16> %a, %mul 157 ret <4 x i16> %sub 158} 159 160define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 161; CHECK-LABEL: test_vmlsq_lane_s16: 162; CHECK: // %bb.0: // %entry 163; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 164; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[3] 165; CHECK-NEXT: ret 166entry: 167 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 168 %mul = mul <8 x i16> %shuffle, %b 169 %sub = sub <8 x i16> %a, %mul 170 ret <8 x i16> %sub 171} 172 173define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 174; CHECK-LABEL: test_vmls_lane_s32: 175; CHECK: // %bb.0: // %entry 176; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 177; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[1] 178; CHECK-NEXT: ret 179entry: 180 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 181 %mul = mul <2 x i32> %shuffle, %b 182 %sub = sub <2 x i32> %a, %mul 183 ret <2 x i32> %sub 184} 185 186define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 187; CHECK-LABEL: test_vmlsq_lane_s32: 188; CHECK: // %bb.0: // %entry 189; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 190; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[1] 191; CHECK-NEXT: ret 192entry: 193 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 194 %mul = mul <4 x i32> %shuffle, %b 195 %sub = sub <4 x i32> %a, %mul 196 ret <4 x i32> %sub 197} 198 199define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 200; CHECK-LABEL: test_vmls_laneq_s16: 201; CHECK: // %bb.0: // %entry 202; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[7] 203; CHECK-NEXT: ret 204entry: 205 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 206 %mul = mul <4 x i16> %shuffle, %b 207 %sub = sub <4 x i16> %a, %mul 208 ret <4 x i16> %sub 209} 210 211define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 212; CHECK-LABEL: test_vmlsq_laneq_s16: 213; CHECK: // %bb.0: // %entry 214; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[7] 215; CHECK-NEXT: ret 216entry: 217 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 218 %mul = mul <8 x i16> %shuffle, %b 219 %sub = sub <8 x i16> %a, %mul 220 ret <8 x i16> %sub 221} 222 223define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 224; CHECK-LABEL: test_vmls_laneq_s32: 225; CHECK: // %bb.0: // %entry 226; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[3] 227; CHECK-NEXT: ret 228entry: 229 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 230 %mul = mul <2 x i32> %shuffle, %b 231 %sub = sub <2 x i32> %a, %mul 232 ret <2 x i32> %sub 233} 234 235define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 236; CHECK-LABEL: test_vmlsq_laneq_s32: 237; CHECK: // %bb.0: // %entry 238; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[3] 239; CHECK-NEXT: ret 240entry: 241 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 242 %mul = mul <4 x i32> %shuffle, %b 243 %sub = sub <4 x i32> %a, %mul 244 ret <4 x i32> %sub 245} 246 247define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { 248; CHECK-LABEL: test_vmul_lane_s16: 249; CHECK: // %bb.0: // %entry 250; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 251; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3] 252; CHECK-NEXT: ret 253entry: 254 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 255 %mul = mul <4 x i16> %shuffle, %a 256 ret <4 x i16> %mul 257} 258 259define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 260; CHECK-LABEL: test_vmulq_lane_s16: 261; CHECK: // %bb.0: // %entry 262; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 263; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3] 264; CHECK-NEXT: ret 265entry: 266 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 267 %mul = mul <8 x i16> %shuffle, %a 268 ret <8 x i16> %mul 269} 270 271define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { 272; CHECK-LABEL: test_vmul_lane_s32: 273; CHECK: // %bb.0: // %entry 274; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 275; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1] 276; CHECK-NEXT: ret 277entry: 278 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 279 %mul = mul <2 x i32> %shuffle, %a 280 ret <2 x i32> %mul 281} 282 283define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 284; CHECK-LABEL: test_vmulq_lane_s32: 285; CHECK: // %bb.0: // %entry 286; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 287; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1] 288; CHECK-NEXT: ret 289entry: 290 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 291 %mul = mul <4 x i32> %shuffle, %a 292 ret <4 x i32> %mul 293} 294 295define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { 296; CHECK-LABEL: test_vmul_lane_u16: 297; CHECK: // %bb.0: // %entry 298; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 299; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3] 300; CHECK-NEXT: ret 301entry: 302 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 303 %mul = mul <4 x i16> %shuffle, %a 304 ret <4 x i16> %mul 305} 306 307define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { 308; CHECK-LABEL: test_vmulq_lane_u16: 309; CHECK: // %bb.0: // %entry 310; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 311; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3] 312; CHECK-NEXT: ret 313entry: 314 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 315 %mul = mul <8 x i16> %shuffle, %a 316 ret <8 x i16> %mul 317} 318 319define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { 320; CHECK-LABEL: test_vmul_lane_u32: 321; CHECK: // %bb.0: // %entry 322; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 323; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1] 324; CHECK-NEXT: ret 325entry: 326 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 327 %mul = mul <2 x i32> %shuffle, %a 328 ret <2 x i32> %mul 329} 330 331define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { 332; CHECK-LABEL: test_vmulq_lane_u32: 333; CHECK: // %bb.0: // %entry 334; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 335; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1] 336; CHECK-NEXT: ret 337entry: 338 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 339 %mul = mul <4 x i32> %shuffle, %a 340 ret <4 x i32> %mul 341} 342 343define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 344; CHECK-LABEL: test_vmul_laneq_s16: 345; CHECK: // %bb.0: // %entry 346; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7] 347; CHECK-NEXT: ret 348entry: 349 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 350 %mul = mul <4 x i16> %shuffle, %a 351 ret <4 x i16> %mul 352} 353 354define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 355; CHECK-LABEL: test_vmulq_laneq_s16: 356; CHECK: // %bb.0: // %entry 357; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7] 358; CHECK-NEXT: ret 359entry: 360 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 361 %mul = mul <8 x i16> %shuffle, %a 362 ret <8 x i16> %mul 363} 364 365define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 366; CHECK-LABEL: test_vmul_laneq_s32: 367; CHECK: // %bb.0: // %entry 368; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3] 369; CHECK-NEXT: ret 370entry: 371 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 372 %mul = mul <2 x i32> %shuffle, %a 373 ret <2 x i32> %mul 374} 375 376define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 377; CHECK-LABEL: test_vmulq_laneq_s32: 378; CHECK: // %bb.0: // %entry 379; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3] 380; CHECK-NEXT: ret 381entry: 382 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 383 %mul = mul <4 x i32> %shuffle, %a 384 ret <4 x i32> %mul 385} 386 387define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 388; CHECK-LABEL: test_vmul_laneq_u16: 389; CHECK: // %bb.0: // %entry 390; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7] 391; CHECK-NEXT: ret 392entry: 393 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 394 %mul = mul <4 x i16> %shuffle, %a 395 ret <4 x i16> %mul 396} 397 398define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 399; CHECK-LABEL: test_vmulq_laneq_u16: 400; CHECK: // %bb.0: // %entry 401; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7] 402; CHECK-NEXT: ret 403entry: 404 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 405 %mul = mul <8 x i16> %shuffle, %a 406 ret <8 x i16> %mul 407} 408 409define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 410; CHECK-LABEL: test_vmul_laneq_u32: 411; CHECK: // %bb.0: // %entry 412; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3] 413; CHECK-NEXT: ret 414entry: 415 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 416 %mul = mul <2 x i32> %shuffle, %a 417 ret <2 x i32> %mul 418} 419 420define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 421; CHECK-LABEL: test_vmulq_laneq_u32: 422; CHECK: // %bb.0: // %entry 423; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3] 424; CHECK-NEXT: ret 425entry: 426 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 427 %mul = mul <4 x i32> %shuffle, %a 428 ret <4 x i32> %mul 429} 430 431define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 432; CHECK-LABEL: test_vfma_lane_f32: 433; CHECK: // %bb.0: // %entry 434; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 435; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1] 436; CHECK-NEXT: ret 437entry: 438 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 439 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 440 ret <2 x float> %0 441} 442 443declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 444 445define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 446; CHECK-LABEL: test_vfmaq_lane_f32: 447; CHECK: // %bb.0: // %entry 448; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 449; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[1] 450; CHECK-NEXT: ret 451entry: 452 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 453 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 454 ret <4 x float> %0 455} 456 457declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 458 459define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 460; CHECK-LABEL: test_vfma_laneq_f32: 461; CHECK: // %bb.0: // %entry 462; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[3] 463; CHECK-NEXT: ret 464entry: 465 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 466 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 467 ret <2 x float> %0 468} 469 470define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 471; CHECK-LABEL: test_vfmaq_laneq_f32: 472; CHECK: // %bb.0: // %entry 473; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[3] 474; CHECK-NEXT: ret 475entry: 476 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 477 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 478 ret <4 x float> %0 479} 480 481define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 482; CHECK-LABEL: test_vfms_lane_f32: 483; CHECK: // %bb.0: // %entry 484; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 485; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[1] 486; CHECK-NEXT: ret 487entry: 488 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 489 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 490 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 491 ret <2 x float> %0 492} 493 494define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 495; CHECK-LABEL: test_vfmsq_lane_f32: 496; CHECK: // %bb.0: // %entry 497; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 498; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[1] 499; CHECK-NEXT: ret 500entry: 501 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 502 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 503 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 504 ret <4 x float> %0 505} 506 507define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 508; CHECK-LABEL: test_vfms_laneq_f32: 509; CHECK: // %bb.0: // %entry 510; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[3] 511; CHECK-NEXT: ret 512entry: 513 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 514 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> 515 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 516 ret <2 x float> %0 517} 518 519define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 520; CHECK-LABEL: test_vfmsq_laneq_f32: 521; CHECK: // %bb.0: // %entry 522; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[3] 523; CHECK-NEXT: ret 524entry: 525 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 526 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 527 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 528 ret <4 x float> %0 529} 530 531define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 532; CHECK-LABEL: test_vfmaq_lane_f64: 533; CHECK: // %bb.0: // %entry 534; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 535; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0] 536; CHECK-NEXT: ret 537entry: 538 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 539 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 540 ret <2 x double> %0 541} 542 543declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 544 545define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 546; CHECK-LABEL: test_vfmaq_laneq_f64: 547; CHECK: // %bb.0: // %entry 548; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[1] 549; CHECK-NEXT: ret 550entry: 551 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 552 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 553 ret <2 x double> %0 554} 555 556define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 557; CHECK-LABEL: test_vfmsq_lane_f64: 558; CHECK: // %bb.0: // %entry 559; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 560; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[0] 561; CHECK-NEXT: ret 562entry: 563 %sub = fsub <1 x double> <double -0.000000e+00>, %v 564 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer 565 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 566 ret <2 x double> %0 567} 568 569define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 570; CHECK-LABEL: test_vfmsq_laneq_f64: 571; CHECK: // %bb.0: // %entry 572; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[1] 573; CHECK-NEXT: ret 574entry: 575 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 576 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> 577 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 578 ret <2 x double> %0 579} 580 581define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { 582; CHECK-LABEL: test_vfmas_laneq_f32: 583; CHECK: // %bb.0: // %entry 584; CHECK-NEXT: fmla s0, s1, v2.s[3] 585; CHECK-NEXT: ret 586entry: 587 %extract = extractelement <4 x float> %v, i32 3 588 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 589 ret float %0 590} 591 592declare float @llvm.fma.f32(float, float, float) 593 594define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { 595; CHECK-LABEL: test_vfmsd_lane_f64: 596; CHECK: // %bb.0: // %entry 597; CHECK-NEXT: fmsub d0, d1, d2, d0 598; CHECK-NEXT: ret 599entry: 600 %extract.rhs = extractelement <1 x double> %v, i32 0 601 %extract = fsub double -0.000000e+00, %extract.rhs 602 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 603 ret double %0 604} 605 606declare double @llvm.fma.f64(double, double, double) 607 608define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { 609; CHECK-LABEL: test_vfmss_lane_f32: 610; CHECK: // %bb.0: // %entry 611; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 612; CHECK-NEXT: fmls s0, s1, v2.s[1] 613; CHECK-NEXT: ret 614entry: 615 %extract.rhs = extractelement <2 x float> %v, i32 1 616 %extract = fsub float -0.000000e+00, %extract.rhs 617 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 618 ret float %0 619} 620 621define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { 622; CHECK-LABEL: test_vfmss_laneq_f32: 623; CHECK: // %bb.0: // %entry 624; CHECK-NEXT: fmls s0, s1, v2.s[3] 625; CHECK-NEXT: ret 626entry: 627 %extract.rhs = extractelement <4 x float> %v, i32 3 628 %extract = fsub float -0.000000e+00, %extract.rhs 629 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 630 ret float %0 631} 632 633define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { 634; CHECK-LABEL: test_vfmsd_laneq_f64: 635; CHECK: // %bb.0: // %entry 636; CHECK-NEXT: fmls d0, d1, v2.d[1] 637; CHECK-NEXT: ret 638entry: 639 %extract.rhs = extractelement <2 x double> %v, i32 1 640 %extract = fsub double -0.000000e+00, %extract.rhs 641 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 642 ret double %0 643} 644 645define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { 646; CHECK-LABEL: test_vfmsd_lane_f64_0: 647; CHECK: // %bb.0: // %entry 648; CHECK-NEXT: fmsub d0, d1, d2, d0 649; CHECK-NEXT: ret 650entry: 651 %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v 652 %tmp1 = extractelement <1 x double> %tmp0, i32 0 653 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 654 ret double %0 655} 656 657define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { 658; CHECK-LABEL: test_vfmss_lane_f32_0: 659; CHECK: // %bb.0: // %entry 660; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 661; CHECK-NEXT: fmls s0, s1, v2.s[1] 662; CHECK-NEXT: ret 663entry: 664 %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 665 %tmp1 = extractelement <2 x float> %tmp0, i32 1 666 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 667 ret float %0 668} 669 670define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { 671; CHECK-LABEL: test_vfmss_laneq_f32_0: 672; CHECK: // %bb.0: // %entry 673; CHECK-NEXT: fmls s0, s1, v2.s[3] 674; CHECK-NEXT: ret 675entry: 676 %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 677 %tmp1 = extractelement <4 x float> %tmp0, i32 3 678 %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) 679 ret float %0 680} 681 682define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { 683; CHECK-LABEL: test_vfmsd_laneq_f64_0: 684; CHECK: // %bb.0: // %entry 685; CHECK-NEXT: fmls d0, d1, v2.d[1] 686; CHECK-NEXT: ret 687entry: 688 %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v 689 %tmp1 = extractelement <2 x double> %tmp0, i32 1 690 %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) 691 ret double %0 692} 693 694define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 695; CHECK-LABEL: test_vmlal_lane_s16: 696; CHECK: // %bb.0: // %entry 697; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 698; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[3] 699; CHECK-NEXT: ret 700entry: 701 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 702 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 703 %add = add <4 x i32> %vmull2.i, %a 704 ret <4 x i32> %add 705} 706 707define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 708; CHECK-LABEL: test_vmlal_lane_s32: 709; CHECK: // %bb.0: // %entry 710; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 711; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[1] 712; CHECK-NEXT: ret 713entry: 714 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 715 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 716 %add = add <2 x i64> %vmull2.i, %a 717 ret <2 x i64> %add 718} 719 720define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 721; CHECK-LABEL: test_vmlal_laneq_s16: 722; CHECK: // %bb.0: // %entry 723; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[7] 724; CHECK-NEXT: ret 725entry: 726 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 727 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 728 %add = add <4 x i32> %vmull2.i, %a 729 ret <4 x i32> %add 730} 731 732define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 733; CHECK-LABEL: test_vmlal_laneq_s32: 734; CHECK: // %bb.0: // %entry 735; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[3] 736; CHECK-NEXT: ret 737entry: 738 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 739 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 740 %add = add <2 x i64> %vmull2.i, %a 741 ret <2 x i64> %add 742} 743 744define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 745; CHECK-LABEL: test_vmlal_high_lane_s16: 746; CHECK: // %bb.0: // %entry 747; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 748; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[3] 749; CHECK-NEXT: ret 750entry: 751 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 752 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 753 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 754 %add = add <4 x i32> %vmull2.i, %a 755 ret <4 x i32> %add 756} 757 758define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 759; CHECK-LABEL: test_vmlal_high_lane_s32: 760; CHECK: // %bb.0: // %entry 761; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 762; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[1] 763; CHECK-NEXT: ret 764entry: 765 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 766 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 767 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 768 %add = add <2 x i64> %vmull2.i, %a 769 ret <2 x i64> %add 770} 771 772define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 773; CHECK-LABEL: test_vmlal_high_laneq_s16: 774; CHECK: // %bb.0: // %entry 775; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[7] 776; CHECK-NEXT: ret 777entry: 778 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 779 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 780 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 781 %add = add <4 x i32> %vmull2.i, %a 782 ret <4 x i32> %add 783} 784 785define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 786; CHECK-LABEL: test_vmlal_high_laneq_s32: 787; CHECK: // %bb.0: // %entry 788; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[3] 789; CHECK-NEXT: ret 790entry: 791 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 792 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 793 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 794 %add = add <2 x i64> %vmull2.i, %a 795 ret <2 x i64> %add 796} 797 798define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 799; CHECK-LABEL: test_vmlsl_lane_s16: 800; CHECK: // %bb.0: // %entry 801; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 802; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[3] 803; CHECK-NEXT: ret 804entry: 805 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 806 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 807 %sub = sub <4 x i32> %a, %vmull2.i 808 ret <4 x i32> %sub 809} 810 811define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 812; CHECK-LABEL: test_vmlsl_lane_s32: 813; CHECK: // %bb.0: // %entry 814; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 815; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[1] 816; CHECK-NEXT: ret 817entry: 818 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 819 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 820 %sub = sub <2 x i64> %a, %vmull2.i 821 ret <2 x i64> %sub 822} 823 824define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 825; CHECK-LABEL: test_vmlsl_laneq_s16: 826; CHECK: // %bb.0: // %entry 827; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[7] 828; CHECK-NEXT: ret 829entry: 830 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 831 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 832 %sub = sub <4 x i32> %a, %vmull2.i 833 ret <4 x i32> %sub 834} 835 836define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 837; CHECK-LABEL: test_vmlsl_laneq_s32: 838; CHECK: // %bb.0: // %entry 839; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[3] 840; CHECK-NEXT: ret 841entry: 842 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 843 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 844 %sub = sub <2 x i64> %a, %vmull2.i 845 ret <2 x i64> %sub 846} 847 848define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 849; CHECK-LABEL: test_vmlsl_high_lane_s16: 850; CHECK: // %bb.0: // %entry 851; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 852; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[3] 853; CHECK-NEXT: ret 854entry: 855 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 856 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 857 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 858 %sub = sub <4 x i32> %a, %vmull2.i 859 ret <4 x i32> %sub 860} 861 862define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 863; CHECK-LABEL: test_vmlsl_high_lane_s32: 864; CHECK: // %bb.0: // %entry 865; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 866; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[1] 867; CHECK-NEXT: ret 868entry: 869 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 870 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 871 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 872 %sub = sub <2 x i64> %a, %vmull2.i 873 ret <2 x i64> %sub 874} 875 876define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 877; CHECK-LABEL: test_vmlsl_high_laneq_s16: 878; CHECK: // %bb.0: // %entry 879; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[7] 880; CHECK-NEXT: ret 881entry: 882 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 883 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 884 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 885 %sub = sub <4 x i32> %a, %vmull2.i 886 ret <4 x i32> %sub 887} 888 889define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 890; CHECK-LABEL: test_vmlsl_high_laneq_s32: 891; CHECK: // %bb.0: // %entry 892; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[3] 893; CHECK-NEXT: ret 894entry: 895 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 896 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 897 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 898 %sub = sub <2 x i64> %a, %vmull2.i 899 ret <2 x i64> %sub 900} 901 902define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 903; CHECK-LABEL: test_vmlal_lane_u16: 904; CHECK: // %bb.0: // %entry 905; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 906; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[3] 907; CHECK-NEXT: ret 908entry: 909 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 910 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 911 %add = add <4 x i32> %vmull2.i, %a 912 ret <4 x i32> %add 913} 914 915define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 916; CHECK-LABEL: test_vmlal_lane_u32: 917; CHECK: // %bb.0: // %entry 918; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 919; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[1] 920; CHECK-NEXT: ret 921entry: 922 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 923 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 924 %add = add <2 x i64> %vmull2.i, %a 925 ret <2 x i64> %add 926} 927 928define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 929; CHECK-LABEL: test_vmlal_laneq_u16: 930; CHECK: // %bb.0: // %entry 931; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[7] 932; CHECK-NEXT: ret 933entry: 934 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 935 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 936 %add = add <4 x i32> %vmull2.i, %a 937 ret <4 x i32> %add 938} 939 940define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 941; CHECK-LABEL: test_vmlal_laneq_u32: 942; CHECK: // %bb.0: // %entry 943; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[3] 944; CHECK-NEXT: ret 945entry: 946 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 947 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 948 %add = add <2 x i64> %vmull2.i, %a 949 ret <2 x i64> %add 950} 951 952define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 953; CHECK-LABEL: test_vmlal_high_lane_u16: 954; CHECK: // %bb.0: // %entry 955; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 956; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[3] 957; CHECK-NEXT: ret 958entry: 959 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 960 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 961 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 962 %add = add <4 x i32> %vmull2.i, %a 963 ret <4 x i32> %add 964} 965 966define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 967; CHECK-LABEL: test_vmlal_high_lane_u32: 968; CHECK: // %bb.0: // %entry 969; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 970; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[1] 971; CHECK-NEXT: ret 972entry: 973 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 974 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 975 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 976 %add = add <2 x i64> %vmull2.i, %a 977 ret <2 x i64> %add 978} 979 980define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 981; CHECK-LABEL: test_vmlal_high_laneq_u16: 982; CHECK: // %bb.0: // %entry 983; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[7] 984; CHECK-NEXT: ret 985entry: 986 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 987 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 988 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 989 %add = add <4 x i32> %vmull2.i, %a 990 ret <4 x i32> %add 991} 992 993define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 994; CHECK-LABEL: test_vmlal_high_laneq_u32: 995; CHECK: // %bb.0: // %entry 996; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[3] 997; CHECK-NEXT: ret 998entry: 999 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1000 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1001 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1002 %add = add <2 x i64> %vmull2.i, %a 1003 ret <2 x i64> %add 1004} 1005 1006define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1007; CHECK-LABEL: test_vmlsl_lane_u16: 1008; CHECK: // %bb.0: // %entry 1009; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1010; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[3] 1011; CHECK-NEXT: ret 1012entry: 1013 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1014 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1015 %sub = sub <4 x i32> %a, %vmull2.i 1016 ret <4 x i32> %sub 1017} 1018 1019define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1020; CHECK-LABEL: test_vmlsl_lane_u32: 1021; CHECK: // %bb.0: // %entry 1022; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1023; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[1] 1024; CHECK-NEXT: ret 1025entry: 1026 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1027 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1028 %sub = sub <2 x i64> %a, %vmull2.i 1029 ret <2 x i64> %sub 1030} 1031 1032define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 1033; CHECK-LABEL: test_vmlsl_laneq_u16: 1034; CHECK: // %bb.0: // %entry 1035; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[7] 1036; CHECK-NEXT: ret 1037entry: 1038 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1039 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1040 %sub = sub <4 x i32> %a, %vmull2.i 1041 ret <4 x i32> %sub 1042} 1043 1044define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 1045; CHECK-LABEL: test_vmlsl_laneq_u32: 1046; CHECK: // %bb.0: // %entry 1047; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[3] 1048; CHECK-NEXT: ret 1049entry: 1050 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1051 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1052 %sub = sub <2 x i64> %a, %vmull2.i 1053 ret <2 x i64> %sub 1054} 1055 1056define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1057; CHECK-LABEL: test_vmlsl_high_lane_u16: 1058; CHECK: // %bb.0: // %entry 1059; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1060; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[3] 1061; CHECK-NEXT: ret 1062entry: 1063 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1064 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1065 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1066 %sub = sub <4 x i32> %a, %vmull2.i 1067 ret <4 x i32> %sub 1068} 1069 1070define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1071; CHECK-LABEL: test_vmlsl_high_lane_u32: 1072; CHECK: // %bb.0: // %entry 1073; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1074; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[1] 1075; CHECK-NEXT: ret 1076entry: 1077 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1078 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1079 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1080 %sub = sub <2 x i64> %a, %vmull2.i 1081 ret <2 x i64> %sub 1082} 1083 1084define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 1085; CHECK-LABEL: test_vmlsl_high_laneq_u16: 1086; CHECK: // %bb.0: // %entry 1087; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[7] 1088; CHECK-NEXT: ret 1089entry: 1090 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1091 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1092 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1093 %sub = sub <4 x i32> %a, %vmull2.i 1094 ret <4 x i32> %sub 1095} 1096 1097define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 1098; CHECK-LABEL: test_vmlsl_high_laneq_u32: 1099; CHECK: // %bb.0: // %entry 1100; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[3] 1101; CHECK-NEXT: ret 1102entry: 1103 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1104 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1105 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1106 %sub = sub <2 x i64> %a, %vmull2.i 1107 ret <2 x i64> %sub 1108} 1109 1110define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1111; CHECK-LABEL: test_vmull_lane_s16: 1112; CHECK: // %bb.0: // %entry 1113; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1114; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[3] 1115; CHECK-NEXT: ret 1116entry: 1117 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1118 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1119 ret <4 x i32> %vmull2.i 1120} 1121 1122define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1123; CHECK-LABEL: test_vmull_lane_s32: 1124; CHECK: // %bb.0: // %entry 1125; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1126; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[1] 1127; CHECK-NEXT: ret 1128entry: 1129 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1130 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1131 ret <2 x i64> %vmull2.i 1132} 1133 1134define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { 1135; CHECK-LABEL: test_vmull_lane_u16: 1136; CHECK: // %bb.0: // %entry 1137; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1138; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[3] 1139; CHECK-NEXT: ret 1140entry: 1141 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1142 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1143 ret <4 x i32> %vmull2.i 1144} 1145 1146define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { 1147; CHECK-LABEL: test_vmull_lane_u32: 1148; CHECK: // %bb.0: // %entry 1149; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1150; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[1] 1151; CHECK-NEXT: ret 1152entry: 1153 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1154 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1155 ret <2 x i64> %vmull2.i 1156} 1157 1158define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1159; CHECK-LABEL: test_vmull_high_lane_s16: 1160; CHECK: // %bb.0: // %entry 1161; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1162; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[3] 1163; CHECK-NEXT: ret 1164entry: 1165 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1166 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1167 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1168 ret <4 x i32> %vmull2.i 1169} 1170 1171define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1172; CHECK-LABEL: test_vmull_high_lane_s32: 1173; CHECK: // %bb.0: // %entry 1174; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1175; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[1] 1176; CHECK-NEXT: ret 1177entry: 1178 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1179 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1180 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1181 ret <2 x i64> %vmull2.i 1182} 1183 1184define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { 1185; CHECK-LABEL: test_vmull_high_lane_u16: 1186; CHECK: // %bb.0: // %entry 1187; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1188; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[3] 1189; CHECK-NEXT: ret 1190entry: 1191 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1192 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1193 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1194 ret <4 x i32> %vmull2.i 1195} 1196 1197define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { 1198; CHECK-LABEL: test_vmull_high_lane_u32: 1199; CHECK: // %bb.0: // %entry 1200; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1201; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[1] 1202; CHECK-NEXT: ret 1203entry: 1204 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1205 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1206 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1207 ret <2 x i64> %vmull2.i 1208} 1209 1210define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1211; CHECK-LABEL: test_vmull_laneq_s16: 1212; CHECK: // %bb.0: // %entry 1213; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[7] 1214; CHECK-NEXT: ret 1215entry: 1216 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1217 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1218 ret <4 x i32> %vmull2.i 1219} 1220 1221define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1222; CHECK-LABEL: test_vmull_laneq_s32: 1223; CHECK: // %bb.0: // %entry 1224; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[3] 1225; CHECK-NEXT: ret 1226entry: 1227 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1228 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1229 ret <2 x i64> %vmull2.i 1230} 1231 1232define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 1233; CHECK-LABEL: test_vmull_laneq_u16: 1234; CHECK: // %bb.0: // %entry 1235; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[7] 1236; CHECK-NEXT: ret 1237entry: 1238 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1239 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1240 ret <4 x i32> %vmull2.i 1241} 1242 1243define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 1244; CHECK-LABEL: test_vmull_laneq_u32: 1245; CHECK: // %bb.0: // %entry 1246; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[3] 1247; CHECK-NEXT: ret 1248entry: 1249 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1250 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1251 ret <2 x i64> %vmull2.i 1252} 1253 1254define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1255; CHECK-LABEL: test_vmull_high_laneq_s16: 1256; CHECK: // %bb.0: // %entry 1257; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[7] 1258; CHECK-NEXT: ret 1259entry: 1260 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1261 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1262 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1263 ret <4 x i32> %vmull2.i 1264} 1265 1266define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1267; CHECK-LABEL: test_vmull_high_laneq_s32: 1268; CHECK: // %bb.0: // %entry 1269; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[3] 1270; CHECK-NEXT: ret 1271entry: 1272 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1273 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1274 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1275 ret <2 x i64> %vmull2.i 1276} 1277 1278define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 1279; CHECK-LABEL: test_vmull_high_laneq_u16: 1280; CHECK: // %bb.0: // %entry 1281; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[7] 1282; CHECK-NEXT: ret 1283entry: 1284 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1285 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1286 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1287 ret <4 x i32> %vmull2.i 1288} 1289 1290define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 1291; CHECK-LABEL: test_vmull_high_laneq_u32: 1292; CHECK: // %bb.0: // %entry 1293; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[3] 1294; CHECK-NEXT: ret 1295entry: 1296 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1297 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1298 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1299 ret <2 x i64> %vmull2.i 1300} 1301 1302define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1303; CHECK-LABEL: test_vqdmlal_lane_s16: 1304; CHECK: // %bb.0: // %entry 1305; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1306; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3] 1307; CHECK-NEXT: ret 1308entry: 1309 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1310 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1311 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1312 ret <4 x i32> %vqdmlal4.i 1313} 1314 1315define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1316; CHECK-LABEL: test_vqdmlal_lane_s32: 1317; CHECK: // %bb.0: // %entry 1318; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1319; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1] 1320; CHECK-NEXT: ret 1321entry: 1322 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1323 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1324 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1325 ret <2 x i64> %vqdmlal4.i 1326} 1327 1328define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1329; CHECK-LABEL: test_vqdmlal_high_lane_s16: 1330; CHECK: // %bb.0: // %entry 1331; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1332; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3] 1333; CHECK-NEXT: ret 1334entry: 1335 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1336 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1337 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1338 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1339 ret <4 x i32> %vqdmlal4.i 1340} 1341 1342define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1343; CHECK-LABEL: test_vqdmlal_high_lane_s32: 1344; CHECK: // %bb.0: // %entry 1345; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1346; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1] 1347; CHECK-NEXT: ret 1348entry: 1349 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1350 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1351 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1352 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1353 ret <2 x i64> %vqdmlal4.i 1354} 1355 1356define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1357; CHECK-LABEL: test_vqdmlsl_lane_s16: 1358; CHECK: // %bb.0: // %entry 1359; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1360; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3] 1361; CHECK-NEXT: ret 1362entry: 1363 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1364 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1365 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1366 ret <4 x i32> %vqdmlsl4.i 1367} 1368 1369define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1370; CHECK-LABEL: test_vqdmlsl_lane_s32: 1371; CHECK: // %bb.0: // %entry 1372; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1373; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1] 1374; CHECK-NEXT: ret 1375entry: 1376 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1377 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1378 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1379 ret <2 x i64> %vqdmlsl4.i 1380} 1381 1382define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1383; CHECK-LABEL: test_vqdmlsl_high_lane_s16: 1384; CHECK: // %bb.0: // %entry 1385; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1386; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3] 1387; CHECK-NEXT: ret 1388entry: 1389 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1390 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1391 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1392 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1393 ret <4 x i32> %vqdmlsl4.i 1394} 1395 1396define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1397; CHECK-LABEL: test_vqdmlsl_high_lane_s32: 1398; CHECK: // %bb.0: // %entry 1399; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1400; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1] 1401; CHECK-NEXT: ret 1402entry: 1403 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1404 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1405 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1406 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1407 ret <2 x i64> %vqdmlsl4.i 1408} 1409 1410define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1411; CHECK-LABEL: test_vqdmull_lane_s16: 1412; CHECK: // %bb.0: // %entry 1413; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1414; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3] 1415; CHECK-NEXT: ret 1416entry: 1417 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1418 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1419 ret <4 x i32> %vqdmull2.i 1420} 1421 1422define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1423; CHECK-LABEL: test_vqdmull_lane_s32: 1424; CHECK: // %bb.0: // %entry 1425; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1426; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[1] 1427; CHECK-NEXT: ret 1428entry: 1429 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1430 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1431 ret <2 x i64> %vqdmull2.i 1432} 1433 1434define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1435; CHECK-LABEL: test_vqdmull_laneq_s16: 1436; CHECK: // %bb.0: // %entry 1437; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3] 1438; CHECK-NEXT: ret 1439entry: 1440 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1441 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1442 ret <4 x i32> %vqdmull2.i 1443} 1444 1445define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1446; CHECK-LABEL: test_vqdmull_laneq_s32: 1447; CHECK: // %bb.0: // %entry 1448; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[3] 1449; CHECK-NEXT: ret 1450entry: 1451 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1452 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1453 ret <2 x i64> %vqdmull2.i 1454} 1455 1456define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1457; CHECK-LABEL: test_vqdmull_high_lane_s16: 1458; CHECK: // %bb.0: // %entry 1459; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1460; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[3] 1461; CHECK-NEXT: ret 1462entry: 1463 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1464 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1465 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1466 ret <4 x i32> %vqdmull2.i 1467} 1468 1469define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1470; CHECK-LABEL: test_vqdmull_high_lane_s32: 1471; CHECK: // %bb.0: // %entry 1472; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1473; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1] 1474; CHECK-NEXT: ret 1475entry: 1476 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1477 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1478 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1479 ret <2 x i64> %vqdmull2.i 1480} 1481 1482define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1483; CHECK-LABEL: test_vqdmull_high_laneq_s16: 1484; CHECK: // %bb.0: // %entry 1485; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[7] 1486; CHECK-NEXT: ret 1487entry: 1488 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1489 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1490 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1491 ret <4 x i32> %vqdmull2.i 1492} 1493 1494define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1495; CHECK-LABEL: test_vqdmull_high_laneq_s32: 1496; CHECK: // %bb.0: // %entry 1497; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[3] 1498; CHECK-NEXT: ret 1499entry: 1500 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1501 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1502 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1503 ret <2 x i64> %vqdmull2.i 1504} 1505 1506define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1507; CHECK-LABEL: test_vqdmulh_lane_s16: 1508; CHECK: // %bb.0: // %entry 1509; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1510; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3] 1511; CHECK-NEXT: ret 1512entry: 1513 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1514 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1515 ret <4 x i16> %vqdmulh2.i 1516} 1517 1518define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1519; CHECK-LABEL: test_vqdmulhq_lane_s16: 1520; CHECK: // %bb.0: // %entry 1521; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1522; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3] 1523; CHECK-NEXT: ret 1524entry: 1525 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1526 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1527 ret <8 x i16> %vqdmulh2.i 1528} 1529 1530define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1531; CHECK-LABEL: test_vqdmulh_lane_s32: 1532; CHECK: // %bb.0: // %entry 1533; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1534; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1] 1535; CHECK-NEXT: ret 1536entry: 1537 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1538 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1539 ret <2 x i32> %vqdmulh2.i 1540} 1541 1542define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1543; CHECK-LABEL: test_vqdmulhq_lane_s32: 1544; CHECK: // %bb.0: // %entry 1545; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1546; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1] 1547; CHECK-NEXT: ret 1548entry: 1549 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1550 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1551 ret <4 x i32> %vqdmulh2.i 1552} 1553 1554define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1555; CHECK-LABEL: test_vqrdmulh_lane_s16: 1556; CHECK: // %bb.0: // %entry 1557; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1558; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3] 1559; CHECK-NEXT: ret 1560entry: 1561 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1562 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1563 ret <4 x i16> %vqrdmulh2.i 1564} 1565 1566define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1567; CHECK-LABEL: test_vqrdmulhq_lane_s16: 1568; CHECK: // %bb.0: // %entry 1569; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1570; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3] 1571; CHECK-NEXT: ret 1572entry: 1573 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1574 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1575 ret <8 x i16> %vqrdmulh2.i 1576} 1577 1578define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1579; CHECK-LABEL: test_vqrdmulh_lane_s32: 1580; CHECK: // %bb.0: // %entry 1581; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1582; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1] 1583; CHECK-NEXT: ret 1584entry: 1585 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1586 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1587 ret <2 x i32> %vqrdmulh2.i 1588} 1589 1590define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1591; CHECK-LABEL: test_vqrdmulhq_lane_s32: 1592; CHECK: // %bb.0: // %entry 1593; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1594; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1] 1595; CHECK-NEXT: ret 1596entry: 1597 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1598 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1599 ret <4 x i32> %vqrdmulh2.i 1600} 1601 1602define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { 1603; CHECK-LABEL: test_vmul_lane_f32: 1604; CHECK: // %bb.0: // %entry 1605; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1606; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[1] 1607; CHECK-NEXT: ret 1608entry: 1609 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1610 %mul = fmul <2 x float> %shuffle, %a 1611 ret <2 x float> %mul 1612} 1613 1614define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { 1615; CHECK-LABEL: test_vmul_lane_f64: 1616; CHECK: // %bb.0: // %entry 1617; CHECK-NEXT: fmul d0, d0, d1 1618; CHECK-NEXT: ret 1619entry: 1620 %0 = bitcast <1 x double> %a to <8 x i8> 1621 %1 = bitcast <8 x i8> %0 to double 1622 %extract = extractelement <1 x double> %v, i32 0 1623 %2 = fmul double %1, %extract 1624 %3 = insertelement <1 x double> undef, double %2, i32 0 1625 ret <1 x double> %3 1626} 1627 1628define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { 1629; CHECK-LABEL: test_vmulq_lane_f32: 1630; CHECK: // %bb.0: // %entry 1631; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1632; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[1] 1633; CHECK-NEXT: ret 1634entry: 1635 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1636 %mul = fmul <4 x float> %shuffle, %a 1637 ret <4 x float> %mul 1638} 1639 1640define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { 1641; CHECK-LABEL: test_vmulq_lane_f64: 1642; CHECK: // %bb.0: // %entry 1643; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1644; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0] 1645; CHECK-NEXT: ret 1646entry: 1647 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1648 %mul = fmul <2 x double> %shuffle, %a 1649 ret <2 x double> %mul 1650} 1651 1652define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { 1653; CHECK-LABEL: test_vmul_laneq_f32: 1654; CHECK: // %bb.0: // %entry 1655; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3] 1656; CHECK-NEXT: ret 1657entry: 1658 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1659 %mul = fmul <2 x float> %shuffle, %a 1660 ret <2 x float> %mul 1661} 1662 1663define <2 x float> @test_vmul_laneq3_f32_bitcast(<2 x float> %a, <2 x double> %v) { 1664; CHECK-LABEL: test_vmul_laneq3_f32_bitcast: 1665; CHECK: // %bb.0: 1666; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[3] 1667; CHECK-NEXT: ret 1668 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1> 1669 %bc = bitcast <1 x double> %extract to <2 x float> 1670 %splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1671 %mul = fmul <2 x float> %splat, %a 1672 ret <2 x float> %mul 1673} 1674 1675define <2 x float> @test_vmul_laneq2_f32_bitcast(<2 x float> %a, <2 x double> %v) { 1676; CHECK-LABEL: test_vmul_laneq2_f32_bitcast: 1677; CHECK: // %bb.0: 1678; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[2] 1679; CHECK-NEXT: ret 1680 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1> 1681 %bc = bitcast <1 x double> %extract to <2 x float> 1682 %splat = shufflevector <2 x float> %bc, <2 x float> undef, <2 x i32> <i32 0, i32 0> 1683 %mul = fmul <2 x float> %splat, %a 1684 ret <2 x float> %mul 1685} 1686 1687define <4 x i16> @test_vadd_laneq5_i16_bitcast(<4 x i16> %a, <2 x double> %v) { 1688; CHECK-LABEL: test_vadd_laneq5_i16_bitcast: 1689; CHECK: // %bb.0: 1690; CHECK-NEXT: dup v1.4h, v1.h[5] 1691; CHECK-NEXT: add v0.4h, v1.4h, v0.4h 1692; CHECK-NEXT: ret 1693 %extract = shufflevector <2 x double> %v, <2 x double> undef, <1 x i32> <i32 1> 1694 %bc = bitcast <1 x double> %extract to <4 x i16> 1695 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1696 %r = add <4 x i16> %splat, %a 1697 ret <4 x i16> %r 1698} 1699 1700; TODO: The pattern in LowerVECTOR_SHUFFLE does not match what we are looking for. 1701 1702define <4 x i16> @test_vadd_lane2_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) { 1703; CHECK-LABEL: test_vadd_lane2_i16_bitcast_bigger_aligned: 1704; CHECK: // %bb.0: 1705; CHECK-NEXT: ext v1.8b, v1.8b, v0.8b, #2 1706; CHECK-NEXT: dup v1.4h, v1.h[1] 1707; CHECK-NEXT: add v0.4h, v1.4h, v0.4h 1708; CHECK-NEXT: ret 1709 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9> 1710 %bc = bitcast <8 x i8> %extract to <4 x i16> 1711 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1712 %r = add <4 x i16> %splat, %a 1713 ret <4 x i16> %r 1714} 1715 1716define <4 x i16> @test_vadd_lane5_i16_bitcast_bigger_aligned(<4 x i16> %a, <16 x i8> %v) { 1717; CHECK-LABEL: test_vadd_lane5_i16_bitcast_bigger_aligned: 1718; CHECK: // %bb.0: 1719; CHECK-NEXT: dup v1.4h, v1.h[5] 1720; CHECK-NEXT: add v0.4h, v1.4h, v0.4h 1721; CHECK-NEXT: ret 1722 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1723 %bc = bitcast <8 x i8> %extract to <4 x i16> 1724 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1725 %r = add <4 x i16> %splat, %a 1726 ret <4 x i16> %r 1727} 1728 1729; Negative test - can't dup bytes {3,4} of v8i16. 1730 1731define <4 x i16> @test_vadd_lane_i16_bitcast_bigger_unaligned(<4 x i16> %a, <16 x i8> %v) { 1732; CHECK-LABEL: test_vadd_lane_i16_bitcast_bigger_unaligned: 1733; CHECK: // %bb.0: 1734; CHECK-NEXT: ext v1.8b, v1.8b, v0.8b, #1 1735; CHECK-NEXT: dup v1.4h, v1.h[1] 1736; CHECK-NEXT: add v0.4h, v1.4h, v0.4h 1737; CHECK-NEXT: ret 1738 %extract = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8> 1739 %bc = bitcast <8 x i8> %extract to <4 x i16> 1740 %splat = shufflevector <4 x i16> %bc, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1741 %r = add <4 x i16> %splat, %a 1742 ret <4 x i16> %r 1743} 1744 1745define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { 1746; CHECK-LABEL: test_vmul_laneq_f64: 1747; CHECK: // %bb.0: // %entry 1748; CHECK-NEXT: fmul d0, d0, v1.d[1] 1749; CHECK-NEXT: ret 1750entry: 1751 %0 = bitcast <1 x double> %a to <8 x i8> 1752 %1 = bitcast <8 x i8> %0 to double 1753 %extract = extractelement <2 x double> %v, i32 1 1754 %2 = fmul double %1, %extract 1755 %3 = insertelement <1 x double> undef, double %2, i32 0 1756 ret <1 x double> %3 1757} 1758 1759define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1760; CHECK-LABEL: test_vmulq_laneq_f32: 1761; CHECK: // %bb.0: // %entry 1762; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[3] 1763; CHECK-NEXT: ret 1764entry: 1765 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1766 %mul = fmul <4 x float> %shuffle, %a 1767 ret <4 x float> %mul 1768} 1769 1770define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1771; CHECK-LABEL: test_vmulq_laneq_f64: 1772; CHECK: // %bb.0: // %entry 1773; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1] 1774; CHECK-NEXT: ret 1775entry: 1776 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1777 %mul = fmul <2 x double> %shuffle, %a 1778 ret <2 x double> %mul 1779} 1780 1781define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { 1782; CHECK-LABEL: test_vmulx_lane_f32: 1783; CHECK: // %bb.0: // %entry 1784; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1785; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[1] 1786; CHECK-NEXT: ret 1787entry: 1788 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1789 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1790 ret <2 x float> %vmulx2.i 1791} 1792 1793define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { 1794; CHECK-LABEL: test_vmulxq_lane_f32: 1795; CHECK: // %bb.0: // %entry 1796; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1797; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[1] 1798; CHECK-NEXT: ret 1799entry: 1800 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1801 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1802 ret <4 x float> %vmulx2.i 1803} 1804 1805define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { 1806; CHECK-LABEL: test_vmulxq_lane_f64: 1807; CHECK: // %bb.0: // %entry 1808; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1809; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0] 1810; CHECK-NEXT: ret 1811entry: 1812 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1813 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1814 ret <2 x double> %vmulx2.i 1815} 1816 1817define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { 1818; CHECK-LABEL: test_vmulx_laneq_f32: 1819; CHECK: // %bb.0: // %entry 1820; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[3] 1821; CHECK-NEXT: ret 1822entry: 1823 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1824 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1825 ret <2 x float> %vmulx2.i 1826} 1827 1828define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1829; CHECK-LABEL: test_vmulxq_laneq_f32: 1830; CHECK: // %bb.0: // %entry 1831; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[3] 1832; CHECK-NEXT: ret 1833entry: 1834 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1835 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1836 ret <4 x float> %vmulx2.i 1837} 1838 1839define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1840; CHECK-LABEL: test_vmulxq_laneq_f64: 1841; CHECK: // %bb.0: // %entry 1842; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[1] 1843; CHECK-NEXT: ret 1844entry: 1845 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1846 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1847 ret <2 x double> %vmulx2.i 1848} 1849 1850define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1851; CHECK-LABEL: test_vmla_lane_s16_0: 1852; CHECK: // %bb.0: // %entry 1853; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1854; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0] 1855; CHECK-NEXT: ret 1856entry: 1857 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1858 %mul = mul <4 x i16> %shuffle, %b 1859 %add = add <4 x i16> %mul, %a 1860 ret <4 x i16> %add 1861} 1862 1863define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1864; CHECK-LABEL: test_vmlaq_lane_s16_0: 1865; CHECK: // %bb.0: // %entry 1866; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1867; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0] 1868; CHECK-NEXT: ret 1869entry: 1870 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1871 %mul = mul <8 x i16> %shuffle, %b 1872 %add = add <8 x i16> %mul, %a 1873 ret <8 x i16> %add 1874} 1875 1876define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1877; CHECK-LABEL: test_vmla_lane_s32_0: 1878; CHECK: // %bb.0: // %entry 1879; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1880; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0] 1881; CHECK-NEXT: ret 1882entry: 1883 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1884 %mul = mul <2 x i32> %shuffle, %b 1885 %add = add <2 x i32> %mul, %a 1886 ret <2 x i32> %add 1887} 1888 1889define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1890; CHECK-LABEL: test_vmlaq_lane_s32_0: 1891; CHECK: // %bb.0: // %entry 1892; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1893; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0] 1894; CHECK-NEXT: ret 1895entry: 1896 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1897 %mul = mul <4 x i32> %shuffle, %b 1898 %add = add <4 x i32> %mul, %a 1899 ret <4 x i32> %add 1900} 1901 1902define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1903; CHECK-LABEL: test_vmla_laneq_s16_0: 1904; CHECK: // %bb.0: // %entry 1905; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0] 1906; CHECK-NEXT: ret 1907entry: 1908 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1909 %mul = mul <4 x i16> %shuffle, %b 1910 %add = add <4 x i16> %mul, %a 1911 ret <4 x i16> %add 1912} 1913 1914define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1915; CHECK-LABEL: test_vmlaq_laneq_s16_0: 1916; CHECK: // %bb.0: // %entry 1917; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0] 1918; CHECK-NEXT: ret 1919entry: 1920 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1921 %mul = mul <8 x i16> %shuffle, %b 1922 %add = add <8 x i16> %mul, %a 1923 ret <8 x i16> %add 1924} 1925 1926define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1927; CHECK-LABEL: test_vmla_laneq_s32_0: 1928; CHECK: // %bb.0: // %entry 1929; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0] 1930; CHECK-NEXT: ret 1931entry: 1932 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1933 %mul = mul <2 x i32> %shuffle, %b 1934 %add = add <2 x i32> %mul, %a 1935 ret <2 x i32> %add 1936} 1937 1938define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1939; CHECK-LABEL: test_vmlaq_laneq_s32_0: 1940; CHECK: // %bb.0: // %entry 1941; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0] 1942; CHECK-NEXT: ret 1943entry: 1944 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1945 %mul = mul <4 x i32> %shuffle, %b 1946 %add = add <4 x i32> %mul, %a 1947 ret <4 x i32> %add 1948} 1949 1950define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1951; CHECK-LABEL: test_vmls_lane_s16_0: 1952; CHECK: // %bb.0: // %entry 1953; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1954; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0] 1955; CHECK-NEXT: ret 1956entry: 1957 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1958 %mul = mul <4 x i16> %shuffle, %b 1959 %sub = sub <4 x i16> %a, %mul 1960 ret <4 x i16> %sub 1961} 1962 1963define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1964; CHECK-LABEL: test_vmlsq_lane_s16_0: 1965; CHECK: // %bb.0: // %entry 1966; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1967; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0] 1968; CHECK-NEXT: ret 1969entry: 1970 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1971 %mul = mul <8 x i16> %shuffle, %b 1972 %sub = sub <8 x i16> %a, %mul 1973 ret <8 x i16> %sub 1974} 1975 1976define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1977; CHECK-LABEL: test_vmls_lane_s32_0: 1978; CHECK: // %bb.0: // %entry 1979; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1980; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0] 1981; CHECK-NEXT: ret 1982entry: 1983 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1984 %mul = mul <2 x i32> %shuffle, %b 1985 %sub = sub <2 x i32> %a, %mul 1986 ret <2 x i32> %sub 1987} 1988 1989define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1990; CHECK-LABEL: test_vmlsq_lane_s32_0: 1991; CHECK: // %bb.0: // %entry 1992; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 1993; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0] 1994; CHECK-NEXT: ret 1995entry: 1996 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1997 %mul = mul <4 x i32> %shuffle, %b 1998 %sub = sub <4 x i32> %a, %mul 1999 ret <4 x i32> %sub 2000} 2001 2002define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 2003; CHECK-LABEL: test_vmls_laneq_s16_0: 2004; CHECK: // %bb.0: // %entry 2005; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0] 2006; CHECK-NEXT: ret 2007entry: 2008 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2009 %mul = mul <4 x i16> %shuffle, %b 2010 %sub = sub <4 x i16> %a, %mul 2011 ret <4 x i16> %sub 2012} 2013 2014define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 2015; CHECK-LABEL: test_vmlsq_laneq_s16_0: 2016; CHECK: // %bb.0: // %entry 2017; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0] 2018; CHECK-NEXT: ret 2019entry: 2020 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 2021 %mul = mul <8 x i16> %shuffle, %b 2022 %sub = sub <8 x i16> %a, %mul 2023 ret <8 x i16> %sub 2024} 2025 2026define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 2027; CHECK-LABEL: test_vmls_laneq_s32_0: 2028; CHECK: // %bb.0: // %entry 2029; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0] 2030; CHECK-NEXT: ret 2031entry: 2032 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2033 %mul = mul <2 x i32> %shuffle, %b 2034 %sub = sub <2 x i32> %a, %mul 2035 ret <2 x i32> %sub 2036} 2037 2038define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 2039; CHECK-LABEL: test_vmlsq_laneq_s32_0: 2040; CHECK: // %bb.0: // %entry 2041; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0] 2042; CHECK-NEXT: ret 2043entry: 2044 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 2045 %mul = mul <4 x i32> %shuffle, %b 2046 %sub = sub <4 x i32> %a, %mul 2047 ret <4 x i32> %sub 2048} 2049 2050define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2051; CHECK-LABEL: test_vmul_lane_s16_0: 2052; CHECK: // %bb.0: // %entry 2053; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2054; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] 2055; CHECK-NEXT: ret 2056entry: 2057 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2058 %mul = mul <4 x i16> %shuffle, %a 2059 ret <4 x i16> %mul 2060} 2061 2062define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2063; CHECK-LABEL: test_vmulq_lane_s16_0: 2064; CHECK: // %bb.0: // %entry 2065; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2066; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] 2067; CHECK-NEXT: ret 2068entry: 2069 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2070 %mul = mul <8 x i16> %shuffle, %a 2071 ret <8 x i16> %mul 2072} 2073 2074define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2075; CHECK-LABEL: test_vmul_lane_s32_0: 2076; CHECK: // %bb.0: // %entry 2077; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2078; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] 2079; CHECK-NEXT: ret 2080entry: 2081 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2082 %mul = mul <2 x i32> %shuffle, %a 2083 ret <2 x i32> %mul 2084} 2085 2086define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2087; CHECK-LABEL: test_vmulq_lane_s32_0: 2088; CHECK: // %bb.0: // %entry 2089; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2090; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] 2091; CHECK-NEXT: ret 2092entry: 2093 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2094 %mul = mul <4 x i32> %shuffle, %a 2095 ret <4 x i32> %mul 2096} 2097 2098define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2099; CHECK-LABEL: test_vmul_lane_u16_0: 2100; CHECK: // %bb.0: // %entry 2101; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2102; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] 2103; CHECK-NEXT: ret 2104entry: 2105 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2106 %mul = mul <4 x i16> %shuffle, %a 2107 ret <4 x i16> %mul 2108} 2109 2110define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2111; CHECK-LABEL: test_vmulq_lane_u16_0: 2112; CHECK: // %bb.0: // %entry 2113; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2114; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] 2115; CHECK-NEXT: ret 2116entry: 2117 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2118 %mul = mul <8 x i16> %shuffle, %a 2119 ret <8 x i16> %mul 2120} 2121 2122define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2123; CHECK-LABEL: test_vmul_lane_u32_0: 2124; CHECK: // %bb.0: // %entry 2125; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2126; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] 2127; CHECK-NEXT: ret 2128entry: 2129 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2130 %mul = mul <2 x i32> %shuffle, %a 2131 ret <2 x i32> %mul 2132} 2133 2134define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2135; CHECK-LABEL: test_vmulq_lane_u32_0: 2136; CHECK: // %bb.0: // %entry 2137; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2138; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] 2139; CHECK-NEXT: ret 2140entry: 2141 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2142 %mul = mul <4 x i32> %shuffle, %a 2143 ret <4 x i32> %mul 2144} 2145 2146define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2147; CHECK-LABEL: test_vmul_laneq_s16_0: 2148; CHECK: // %bb.0: // %entry 2149; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] 2150; CHECK-NEXT: ret 2151entry: 2152 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2153 %mul = mul <4 x i16> %shuffle, %a 2154 ret <4 x i16> %mul 2155} 2156 2157define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2158; CHECK-LABEL: test_vmulq_laneq_s16_0: 2159; CHECK: // %bb.0: // %entry 2160; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] 2161; CHECK-NEXT: ret 2162entry: 2163 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 2164 %mul = mul <8 x i16> %shuffle, %a 2165 ret <8 x i16> %mul 2166} 2167 2168define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2169; CHECK-LABEL: test_vmul_laneq_s32_0: 2170; CHECK: // %bb.0: // %entry 2171; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] 2172; CHECK-NEXT: ret 2173entry: 2174 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2175 %mul = mul <2 x i32> %shuffle, %a 2176 ret <2 x i32> %mul 2177} 2178 2179define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2180; CHECK-LABEL: test_vmulq_laneq_s32_0: 2181; CHECK: // %bb.0: // %entry 2182; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] 2183; CHECK-NEXT: ret 2184entry: 2185 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 2186 %mul = mul <4 x i32> %shuffle, %a 2187 ret <4 x i32> %mul 2188} 2189 2190define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2191; CHECK-LABEL: test_vmul_laneq_u16_0: 2192; CHECK: // %bb.0: // %entry 2193; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] 2194; CHECK-NEXT: ret 2195entry: 2196 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2197 %mul = mul <4 x i16> %shuffle, %a 2198 ret <4 x i16> %mul 2199} 2200 2201define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2202; CHECK-LABEL: test_vmulq_laneq_u16_0: 2203; CHECK: // %bb.0: // %entry 2204; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] 2205; CHECK-NEXT: ret 2206entry: 2207 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 2208 %mul = mul <8 x i16> %shuffle, %a 2209 ret <8 x i16> %mul 2210} 2211 2212define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2213; CHECK-LABEL: test_vmul_laneq_u32_0: 2214; CHECK: // %bb.0: // %entry 2215; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] 2216; CHECK-NEXT: ret 2217entry: 2218 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2219 %mul = mul <2 x i32> %shuffle, %a 2220 ret <2 x i32> %mul 2221} 2222 2223define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2224; CHECK-LABEL: test_vmulq_laneq_u32_0: 2225; CHECK: // %bb.0: // %entry 2226; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] 2227; CHECK-NEXT: ret 2228entry: 2229 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 2230 %mul = mul <4 x i32> %shuffle, %a 2231 ret <4 x i32> %mul 2232} 2233 2234define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 2235; CHECK-LABEL: test_vfma_lane_f32_0: 2236; CHECK: // %bb.0: // %entry 2237; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2238; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0] 2239; CHECK-NEXT: ret 2240entry: 2241 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2242 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2243 ret <2 x float> %0 2244} 2245 2246define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 2247; CHECK-LABEL: test_vfmaq_lane_f32_0: 2248; CHECK: // %bb.0: // %entry 2249; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2250; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0] 2251; CHECK-NEXT: ret 2252entry: 2253 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2254 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 2255 ret <4 x float> %0 2256} 2257 2258define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 2259; CHECK-LABEL: test_vfma_laneq_f32_0: 2260; CHECK: // %bb.0: // %entry 2261; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[0] 2262; CHECK-NEXT: ret 2263entry: 2264 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2265 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2266 ret <2 x float> %0 2267} 2268 2269define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 2270; CHECK-LABEL: test_vfmaq_laneq_f32_0: 2271; CHECK: // %bb.0: // %entry 2272; CHECK-NEXT: fmla v0.4s, v1.4s, v2.s[0] 2273; CHECK-NEXT: ret 2274entry: 2275 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2276 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 2277 ret <4 x float> %0 2278} 2279 2280define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 2281; CHECK-LABEL: test_vfms_lane_f32_0: 2282; CHECK: // %bb.0: // %entry 2283; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2284; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[0] 2285; CHECK-NEXT: ret 2286entry: 2287 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 2288 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer 2289 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2290 ret <2 x float> %0 2291} 2292 2293define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 2294; CHECK-LABEL: test_vfmsq_lane_f32_0: 2295; CHECK: // %bb.0: // %entry 2296; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2297; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[0] 2298; CHECK-NEXT: ret 2299entry: 2300 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 2301 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer 2302 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 2303 ret <4 x float> %0 2304} 2305 2306define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 2307; CHECK-LABEL: test_vfms_laneq_f32_0: 2308; CHECK: // %bb.0: // %entry 2309; CHECK-NEXT: fmls v0.2s, v1.2s, v2.s[0] 2310; CHECK-NEXT: ret 2311entry: 2312 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 2313 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer 2314 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 2315 ret <2 x float> %0 2316} 2317 2318define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 2319; CHECK-LABEL: test_vfmsq_laneq_f32_0: 2320; CHECK: // %bb.0: // %entry 2321; CHECK-NEXT: fmls v0.4s, v1.4s, v2.s[0] 2322; CHECK-NEXT: ret 2323entry: 2324 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 2325 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer 2326 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 2327 ret <4 x float> %0 2328} 2329 2330define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 2331; CHECK-LABEL: test_vfmaq_laneq_f64_0: 2332; CHECK: // %bb.0: // %entry 2333; CHECK-NEXT: fmla v0.2d, v1.2d, v2.d[0] 2334; CHECK-NEXT: ret 2335entry: 2336 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2337 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 2338 ret <2 x double> %0 2339} 2340 2341define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 2342; CHECK-LABEL: test_vfmsq_laneq_f64_0: 2343; CHECK: // %bb.0: // %entry 2344; CHECK-NEXT: fmls v0.2d, v1.2d, v2.d[0] 2345; CHECK-NEXT: ret 2346entry: 2347 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 2348 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer 2349 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 2350 ret <2 x double> %0 2351} 2352 2353define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2354; CHECK-LABEL: test_vmlal_lane_s16_0: 2355; CHECK: // %bb.0: // %entry 2356; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2357; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0] 2358; CHECK-NEXT: ret 2359entry: 2360 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2361 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2362 %add = add <4 x i32> %vmull2.i, %a 2363 ret <4 x i32> %add 2364} 2365 2366define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2367; CHECK-LABEL: test_vmlal_lane_s32_0: 2368; CHECK: // %bb.0: // %entry 2369; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2370; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0] 2371; CHECK-NEXT: ret 2372entry: 2373 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2374 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2375 %add = add <2 x i64> %vmull2.i, %a 2376 ret <2 x i64> %add 2377} 2378 2379define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2380; CHECK-LABEL: test_vmlal_laneq_s16_0: 2381; CHECK: // %bb.0: // %entry 2382; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0] 2383; CHECK-NEXT: ret 2384entry: 2385 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2386 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2387 %add = add <4 x i32> %vmull2.i, %a 2388 ret <4 x i32> %add 2389} 2390 2391define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2392; CHECK-LABEL: test_vmlal_laneq_s32_0: 2393; CHECK: // %bb.0: // %entry 2394; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0] 2395; CHECK-NEXT: ret 2396entry: 2397 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2398 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2399 %add = add <2 x i64> %vmull2.i, %a 2400 ret <2 x i64> %add 2401} 2402 2403define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2404; CHECK-LABEL: test_vmlal_high_lane_s16_0: 2405; CHECK: // %bb.0: // %entry 2406; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2407; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0] 2408; CHECK-NEXT: ret 2409entry: 2410 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2411 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2412 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2413 %add = add <4 x i32> %vmull2.i, %a 2414 ret <4 x i32> %add 2415} 2416 2417define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2418; CHECK-LABEL: test_vmlal_high_lane_s32_0: 2419; CHECK: // %bb.0: // %entry 2420; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2421; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0] 2422; CHECK-NEXT: ret 2423entry: 2424 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2425 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2426 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2427 %add = add <2 x i64> %vmull2.i, %a 2428 ret <2 x i64> %add 2429} 2430 2431define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2432; CHECK-LABEL: test_vmlal_high_laneq_s16_0: 2433; CHECK: // %bb.0: // %entry 2434; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0] 2435; CHECK-NEXT: ret 2436entry: 2437 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2438 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2439 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2440 %add = add <4 x i32> %vmull2.i, %a 2441 ret <4 x i32> %add 2442} 2443 2444define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2445; CHECK-LABEL: test_vmlal_high_laneq_s32_0: 2446; CHECK: // %bb.0: // %entry 2447; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0] 2448; CHECK-NEXT: ret 2449entry: 2450 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2451 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2452 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2453 %add = add <2 x i64> %vmull2.i, %a 2454 ret <2 x i64> %add 2455} 2456 2457define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2458; CHECK-LABEL: test_vmlsl_lane_s16_0: 2459; CHECK: // %bb.0: // %entry 2460; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2461; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0] 2462; CHECK-NEXT: ret 2463entry: 2464 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2465 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2466 %sub = sub <4 x i32> %a, %vmull2.i 2467 ret <4 x i32> %sub 2468} 2469 2470define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2471; CHECK-LABEL: test_vmlsl_lane_s32_0: 2472; CHECK: // %bb.0: // %entry 2473; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2474; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0] 2475; CHECK-NEXT: ret 2476entry: 2477 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2478 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2479 %sub = sub <2 x i64> %a, %vmull2.i 2480 ret <2 x i64> %sub 2481} 2482 2483define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2484; CHECK-LABEL: test_vmlsl_laneq_s16_0: 2485; CHECK: // %bb.0: // %entry 2486; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0] 2487; CHECK-NEXT: ret 2488entry: 2489 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2490 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2491 %sub = sub <4 x i32> %a, %vmull2.i 2492 ret <4 x i32> %sub 2493} 2494 2495define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2496; CHECK-LABEL: test_vmlsl_laneq_s32_0: 2497; CHECK: // %bb.0: // %entry 2498; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0] 2499; CHECK-NEXT: ret 2500entry: 2501 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2502 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2503 %sub = sub <2 x i64> %a, %vmull2.i 2504 ret <2 x i64> %sub 2505} 2506 2507define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2508; CHECK-LABEL: test_vmlsl_high_lane_s16_0: 2509; CHECK: // %bb.0: // %entry 2510; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2511; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0] 2512; CHECK-NEXT: ret 2513entry: 2514 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2515 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2516 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2517 %sub = sub <4 x i32> %a, %vmull2.i 2518 ret <4 x i32> %sub 2519} 2520 2521define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2522; CHECK-LABEL: test_vmlsl_high_lane_s32_0: 2523; CHECK: // %bb.0: // %entry 2524; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2525; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0] 2526; CHECK-NEXT: ret 2527entry: 2528 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2529 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2530 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2531 %sub = sub <2 x i64> %a, %vmull2.i 2532 ret <2 x i64> %sub 2533} 2534 2535define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2536; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: 2537; CHECK: // %bb.0: // %entry 2538; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0] 2539; CHECK-NEXT: ret 2540entry: 2541 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2542 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2543 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2544 %sub = sub <4 x i32> %a, %vmull2.i 2545 ret <4 x i32> %sub 2546} 2547 2548define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2549; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: 2550; CHECK: // %bb.0: // %entry 2551; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0] 2552; CHECK-NEXT: ret 2553entry: 2554 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2555 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2556 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2557 %sub = sub <2 x i64> %a, %vmull2.i 2558 ret <2 x i64> %sub 2559} 2560 2561define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2562; CHECK-LABEL: test_vmlal_lane_u16_0: 2563; CHECK: // %bb.0: // %entry 2564; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2565; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0] 2566; CHECK-NEXT: ret 2567entry: 2568 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2569 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2570 %add = add <4 x i32> %vmull2.i, %a 2571 ret <4 x i32> %add 2572} 2573 2574define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2575; CHECK-LABEL: test_vmlal_lane_u32_0: 2576; CHECK: // %bb.0: // %entry 2577; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2578; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0] 2579; CHECK-NEXT: ret 2580entry: 2581 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2582 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2583 %add = add <2 x i64> %vmull2.i, %a 2584 ret <2 x i64> %add 2585} 2586 2587define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2588; CHECK-LABEL: test_vmlal_laneq_u16_0: 2589; CHECK: // %bb.0: // %entry 2590; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0] 2591; CHECK-NEXT: ret 2592entry: 2593 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2594 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2595 %add = add <4 x i32> %vmull2.i, %a 2596 ret <4 x i32> %add 2597} 2598 2599define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2600; CHECK-LABEL: test_vmlal_laneq_u32_0: 2601; CHECK: // %bb.0: // %entry 2602; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0] 2603; CHECK-NEXT: ret 2604entry: 2605 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2606 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2607 %add = add <2 x i64> %vmull2.i, %a 2608 ret <2 x i64> %add 2609} 2610 2611define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2612; CHECK-LABEL: test_vmlal_high_lane_u16_0: 2613; CHECK: // %bb.0: // %entry 2614; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2615; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0] 2616; CHECK-NEXT: ret 2617entry: 2618 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2619 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2620 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2621 %add = add <4 x i32> %vmull2.i, %a 2622 ret <4 x i32> %add 2623} 2624 2625define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2626; CHECK-LABEL: test_vmlal_high_lane_u32_0: 2627; CHECK: // %bb.0: // %entry 2628; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2629; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0] 2630; CHECK-NEXT: ret 2631entry: 2632 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2633 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2634 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2635 %add = add <2 x i64> %vmull2.i, %a 2636 ret <2 x i64> %add 2637} 2638 2639define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2640; CHECK-LABEL: test_vmlal_high_laneq_u16_0: 2641; CHECK: // %bb.0: // %entry 2642; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0] 2643; CHECK-NEXT: ret 2644entry: 2645 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2646 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2647 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2648 %add = add <4 x i32> %vmull2.i, %a 2649 ret <4 x i32> %add 2650} 2651 2652define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2653; CHECK-LABEL: test_vmlal_high_laneq_u32_0: 2654; CHECK: // %bb.0: // %entry 2655; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0] 2656; CHECK-NEXT: ret 2657entry: 2658 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2659 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2660 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2661 %add = add <2 x i64> %vmull2.i, %a 2662 ret <2 x i64> %add 2663} 2664 2665define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2666; CHECK-LABEL: test_vmlsl_lane_u16_0: 2667; CHECK: // %bb.0: // %entry 2668; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2669; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0] 2670; CHECK-NEXT: ret 2671entry: 2672 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2673 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2674 %sub = sub <4 x i32> %a, %vmull2.i 2675 ret <4 x i32> %sub 2676} 2677 2678define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2679; CHECK-LABEL: test_vmlsl_lane_u32_0: 2680; CHECK: // %bb.0: // %entry 2681; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2682; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0] 2683; CHECK-NEXT: ret 2684entry: 2685 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2686 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2687 %sub = sub <2 x i64> %a, %vmull2.i 2688 ret <2 x i64> %sub 2689} 2690 2691define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2692; CHECK-LABEL: test_vmlsl_laneq_u16_0: 2693; CHECK: // %bb.0: // %entry 2694; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0] 2695; CHECK-NEXT: ret 2696entry: 2697 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2698 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2699 %sub = sub <4 x i32> %a, %vmull2.i 2700 ret <4 x i32> %sub 2701} 2702 2703define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2704; CHECK-LABEL: test_vmlsl_laneq_u32_0: 2705; CHECK: // %bb.0: // %entry 2706; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0] 2707; CHECK-NEXT: ret 2708entry: 2709 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2710 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2711 %sub = sub <2 x i64> %a, %vmull2.i 2712 ret <2 x i64> %sub 2713} 2714 2715define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2716; CHECK-LABEL: test_vmlsl_high_lane_u16_0: 2717; CHECK: // %bb.0: // %entry 2718; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2719; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0] 2720; CHECK-NEXT: ret 2721entry: 2722 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2723 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2724 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2725 %sub = sub <4 x i32> %a, %vmull2.i 2726 ret <4 x i32> %sub 2727} 2728 2729define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2730; CHECK-LABEL: test_vmlsl_high_lane_u32_0: 2731; CHECK: // %bb.0: // %entry 2732; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2733; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0] 2734; CHECK-NEXT: ret 2735entry: 2736 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2737 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2738 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2739 %sub = sub <2 x i64> %a, %vmull2.i 2740 ret <2 x i64> %sub 2741} 2742 2743define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2744; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: 2745; CHECK: // %bb.0: // %entry 2746; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0] 2747; CHECK-NEXT: ret 2748entry: 2749 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2750 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2751 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2752 %sub = sub <4 x i32> %a, %vmull2.i 2753 ret <4 x i32> %sub 2754} 2755 2756define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2757; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: 2758; CHECK: // %bb.0: // %entry 2759; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0] 2760; CHECK-NEXT: ret 2761entry: 2762 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2763 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2764 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2765 %sub = sub <2 x i64> %a, %vmull2.i 2766 ret <2 x i64> %sub 2767} 2768 2769define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2770; CHECK-LABEL: test_vmull_lane_s16_0: 2771; CHECK: // %bb.0: // %entry 2772; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2773; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0] 2774; CHECK-NEXT: ret 2775entry: 2776 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2777 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2778 ret <4 x i32> %vmull2.i 2779} 2780 2781define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2782; CHECK-LABEL: test_vmull_lane_s32_0: 2783; CHECK: // %bb.0: // %entry 2784; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2785; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0] 2786; CHECK-NEXT: ret 2787entry: 2788 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2789 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2790 ret <2 x i64> %vmull2.i 2791} 2792 2793define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2794; CHECK-LABEL: test_vmull_lane_u16_0: 2795; CHECK: // %bb.0: // %entry 2796; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2797; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0] 2798; CHECK-NEXT: ret 2799entry: 2800 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2801 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2802 ret <4 x i32> %vmull2.i 2803} 2804 2805define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2806; CHECK-LABEL: test_vmull_lane_u32_0: 2807; CHECK: // %bb.0: // %entry 2808; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2809; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0] 2810; CHECK-NEXT: ret 2811entry: 2812 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2813 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2814 ret <2 x i64> %vmull2.i 2815} 2816 2817define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2818; CHECK-LABEL: test_vmull_high_lane_s16_0: 2819; CHECK: // %bb.0: // %entry 2820; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2821; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0] 2822; CHECK-NEXT: ret 2823entry: 2824 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2825 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2826 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2827 ret <4 x i32> %vmull2.i 2828} 2829 2830define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2831; CHECK-LABEL: test_vmull_high_lane_s32_0: 2832; CHECK: // %bb.0: // %entry 2833; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2834; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0] 2835; CHECK-NEXT: ret 2836entry: 2837 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2838 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2839 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2840 ret <2 x i64> %vmull2.i 2841} 2842 2843define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2844; CHECK-LABEL: test_vmull_high_lane_u16_0: 2845; CHECK: // %bb.0: // %entry 2846; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2847; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0] 2848; CHECK-NEXT: ret 2849entry: 2850 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2851 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2852 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2853 ret <4 x i32> %vmull2.i 2854} 2855 2856define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2857; CHECK-LABEL: test_vmull_high_lane_u32_0: 2858; CHECK: // %bb.0: // %entry 2859; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 2860; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0] 2861; CHECK-NEXT: ret 2862entry: 2863 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2864 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2865 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2866 ret <2 x i64> %vmull2.i 2867} 2868 2869define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2870; CHECK-LABEL: test_vmull_laneq_s16_0: 2871; CHECK: // %bb.0: // %entry 2872; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0] 2873; CHECK-NEXT: ret 2874entry: 2875 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2876 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2877 ret <4 x i32> %vmull2.i 2878} 2879 2880define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2881; CHECK-LABEL: test_vmull_laneq_s32_0: 2882; CHECK: // %bb.0: // %entry 2883; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0] 2884; CHECK-NEXT: ret 2885entry: 2886 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2887 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2888 ret <2 x i64> %vmull2.i 2889} 2890 2891define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2892; CHECK-LABEL: test_vmull_laneq_u16_0: 2893; CHECK: // %bb.0: // %entry 2894; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0] 2895; CHECK-NEXT: ret 2896entry: 2897 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2898 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2899 ret <4 x i32> %vmull2.i 2900} 2901 2902define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2903; CHECK-LABEL: test_vmull_laneq_u32_0: 2904; CHECK: // %bb.0: // %entry 2905; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0] 2906; CHECK-NEXT: ret 2907entry: 2908 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2909 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2910 ret <2 x i64> %vmull2.i 2911} 2912 2913define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2914; CHECK-LABEL: test_vmull_high_laneq_s16_0: 2915; CHECK: // %bb.0: // %entry 2916; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0] 2917; CHECK-NEXT: ret 2918entry: 2919 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2920 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2921 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2922 ret <4 x i32> %vmull2.i 2923} 2924 2925define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2926; CHECK-LABEL: test_vmull_high_laneq_s32_0: 2927; CHECK: // %bb.0: // %entry 2928; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0] 2929; CHECK-NEXT: ret 2930entry: 2931 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2932 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2933 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2934 ret <2 x i64> %vmull2.i 2935} 2936 2937define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2938; CHECK-LABEL: test_vmull_high_laneq_u16_0: 2939; CHECK: // %bb.0: // %entry 2940; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0] 2941; CHECK-NEXT: ret 2942entry: 2943 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2944 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2945 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2946 ret <4 x i32> %vmull2.i 2947} 2948 2949define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2950; CHECK-LABEL: test_vmull_high_laneq_u32_0: 2951; CHECK: // %bb.0: // %entry 2952; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0] 2953; CHECK-NEXT: ret 2954entry: 2955 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2956 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2957 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2958 ret <2 x i64> %vmull2.i 2959} 2960 2961define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2962; CHECK-LABEL: test_vqdmlal_lane_s16_0: 2963; CHECK: // %bb.0: // %entry 2964; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2965; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0] 2966; CHECK-NEXT: ret 2967entry: 2968 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2969 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2970 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2971 ret <4 x i32> %vqdmlal4.i 2972} 2973 2974define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2975; CHECK-LABEL: test_vqdmlal_lane_s32_0: 2976; CHECK: // %bb.0: // %entry 2977; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2978; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0] 2979; CHECK-NEXT: ret 2980entry: 2981 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2982 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2983 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2984 ret <2 x i64> %vqdmlal4.i 2985} 2986 2987define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2988; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: 2989; CHECK: // %bb.0: // %entry 2990; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 2991; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0] 2992; CHECK-NEXT: ret 2993entry: 2994 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2995 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2996 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2997 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2998 ret <4 x i32> %vqdmlal4.i 2999} 3000 3001define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 3002; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: 3003; CHECK: // %bb.0: // %entry 3004; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3005; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0] 3006; CHECK-NEXT: ret 3007entry: 3008 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3009 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3010 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 3011 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 3012 ret <2 x i64> %vqdmlal4.i 3013} 3014 3015define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 3016; CHECK-LABEL: test_vqdmlsl_lane_s16_0: 3017; CHECK: // %bb.0: // %entry 3018; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3019; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0] 3020; CHECK-NEXT: ret 3021entry: 3022 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 3023 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 3024 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 3025 ret <4 x i32> %vqdmlsl4.i 3026} 3027 3028define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 3029; CHECK-LABEL: test_vqdmlsl_lane_s32_0: 3030; CHECK: // %bb.0: // %entry 3031; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3032; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0] 3033; CHECK-NEXT: ret 3034entry: 3035 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3036 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 3037 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 3038 ret <2 x i64> %vqdmlsl4.i 3039} 3040 3041define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 3042; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: 3043; CHECK: // %bb.0: // %entry 3044; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3045; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0] 3046; CHECK-NEXT: ret 3047entry: 3048 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3049 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 3050 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 3051 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 3052 ret <4 x i32> %vqdmlsl4.i 3053} 3054 3055define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 3056; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: 3057; CHECK: // %bb.0: // %entry 3058; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3059; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0] 3060; CHECK-NEXT: ret 3061entry: 3062 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3063 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3064 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 3065 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 3066 ret <2 x i64> %vqdmlsl4.i 3067} 3068 3069define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 3070; CHECK-LABEL: test_vqdmull_lane_s16_0: 3071; CHECK: // %bb.0: // %entry 3072; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3073; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0] 3074; CHECK-NEXT: ret 3075entry: 3076 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 3077 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 3078 ret <4 x i32> %vqdmull2.i 3079} 3080 3081define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 3082; CHECK-LABEL: test_vqdmull_lane_s32_0: 3083; CHECK: // %bb.0: // %entry 3084; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3085; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0] 3086; CHECK-NEXT: ret 3087entry: 3088 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3089 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 3090 ret <2 x i64> %vqdmull2.i 3091} 3092 3093define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 3094; CHECK-LABEL: test_vqdmull_laneq_s16_0: 3095; CHECK: // %bb.0: // %entry 3096; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0] 3097; CHECK-NEXT: ret 3098entry: 3099 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 3100 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 3101 ret <4 x i32> %vqdmull2.i 3102} 3103 3104define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 3105; CHECK-LABEL: test_vqdmull_laneq_s32_0: 3106; CHECK: // %bb.0: // %entry 3107; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0] 3108; CHECK-NEXT: ret 3109entry: 3110 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 3111 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 3112 ret <2 x i64> %vqdmull2.i 3113} 3114 3115define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 3116; CHECK-LABEL: test_vqdmull_high_lane_s16_0: 3117; CHECK: // %bb.0: // %entry 3118; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3119; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0] 3120; CHECK-NEXT: ret 3121entry: 3122 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3123 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 3124 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 3125 ret <4 x i32> %vqdmull2.i 3126} 3127 3128define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 3129; CHECK-LABEL: test_vqdmull_high_lane_s32_0: 3130; CHECK: // %bb.0: // %entry 3131; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3132; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0] 3133; CHECK-NEXT: ret 3134entry: 3135 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3136 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3137 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 3138 ret <2 x i64> %vqdmull2.i 3139} 3140 3141define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 3142; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: 3143; CHECK: // %bb.0: // %entry 3144; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0] 3145; CHECK-NEXT: ret 3146entry: 3147 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3148 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 3149 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 3150 ret <4 x i32> %vqdmull2.i 3151} 3152 3153define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 3154; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: 3155; CHECK: // %bb.0: // %entry 3156; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0] 3157; CHECK-NEXT: ret 3158entry: 3159 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3160 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 3161 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 3162 ret <2 x i64> %vqdmull2.i 3163} 3164 3165define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 3166; CHECK-LABEL: test_vqdmulh_lane_s16_0: 3167; CHECK: // %bb.0: // %entry 3168; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3169; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[0] 3170; CHECK-NEXT: ret 3171entry: 3172 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 3173 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 3174 ret <4 x i16> %vqdmulh2.i 3175} 3176 3177define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 3178; CHECK-LABEL: test_vqdmulhq_lane_s16_0: 3179; CHECK: // %bb.0: // %entry 3180; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3181; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[0] 3182; CHECK-NEXT: ret 3183entry: 3184 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 3185 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 3186 ret <8 x i16> %vqdmulh2.i 3187} 3188 3189define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 3190; CHECK-LABEL: test_vqdmulh_lane_s32_0: 3191; CHECK: // %bb.0: // %entry 3192; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3193; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[0] 3194; CHECK-NEXT: ret 3195entry: 3196 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3197 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 3198 ret <2 x i32> %vqdmulh2.i 3199} 3200 3201define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 3202; CHECK-LABEL: test_vqdmulhq_lane_s32_0: 3203; CHECK: // %bb.0: // %entry 3204; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3205; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[0] 3206; CHECK-NEXT: ret 3207entry: 3208 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 3209 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 3210 ret <4 x i32> %vqdmulh2.i 3211} 3212 3213define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 3214; CHECK-LABEL: test_vqrdmulh_lane_s16_0: 3215; CHECK: // %bb.0: // %entry 3216; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3217; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[0] 3218; CHECK-NEXT: ret 3219entry: 3220 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 3221 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 3222 ret <4 x i16> %vqrdmulh2.i 3223} 3224 3225define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 3226; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: 3227; CHECK: // %bb.0: // %entry 3228; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3229; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[0] 3230; CHECK-NEXT: ret 3231entry: 3232 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 3233 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 3234 ret <8 x i16> %vqrdmulh2.i 3235} 3236 3237define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 3238; CHECK-LABEL: test_vqrdmulh_lane_s32_0: 3239; CHECK: // %bb.0: // %entry 3240; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3241; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[0] 3242; CHECK-NEXT: ret 3243entry: 3244 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 3245 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 3246 ret <2 x i32> %vqrdmulh2.i 3247} 3248 3249define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 3250; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: 3251; CHECK: // %bb.0: // %entry 3252; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3253; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[0] 3254; CHECK-NEXT: ret 3255entry: 3256 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 3257 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 3258 ret <4 x i32> %vqrdmulh2.i 3259} 3260 3261define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { 3262; CHECK-LABEL: test_vmul_lane_f32_0: 3263; CHECK: // %bb.0: // %entry 3264; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3265; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0] 3266; CHECK-NEXT: ret 3267entry: 3268 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 3269 %mul = fmul <2 x float> %shuffle, %a 3270 ret <2 x float> %mul 3271} 3272 3273define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 3274; CHECK-LABEL: test_vmulq_lane_f32_0: 3275; CHECK: // %bb.0: // %entry 3276; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3277; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0] 3278; CHECK-NEXT: ret 3279entry: 3280 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 3281 %mul = fmul <4 x float> %shuffle, %a 3282 ret <4 x float> %mul 3283} 3284 3285define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 3286; CHECK-LABEL: test_vmul_laneq_f32_0: 3287; CHECK: // %bb.0: // %entry 3288; CHECK-NEXT: fmul v0.2s, v0.2s, v1.s[0] 3289; CHECK-NEXT: ret 3290entry: 3291 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 3292 %mul = fmul <2 x float> %shuffle, %a 3293 ret <2 x float> %mul 3294} 3295 3296define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { 3297; CHECK-LABEL: test_vmul_laneq_f64_0: 3298; CHECK: // %bb.0: // %entry 3299; CHECK-NEXT: fmul d0, d0, v1.d[0] 3300; CHECK-NEXT: ret 3301entry: 3302 %0 = bitcast <1 x double> %a to <8 x i8> 3303 %1 = bitcast <8 x i8> %0 to double 3304 %extract = extractelement <2 x double> %v, i32 0 3305 %2 = fmul double %1, %extract 3306 %3 = insertelement <1 x double> undef, double %2, i32 0 3307 ret <1 x double> %3 3308} 3309 3310define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 3311; CHECK-LABEL: test_vmulq_laneq_f32_0: 3312; CHECK: // %bb.0: // %entry 3313; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0] 3314; CHECK-NEXT: ret 3315entry: 3316 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 3317 %mul = fmul <4 x float> %shuffle, %a 3318 ret <4 x float> %mul 3319} 3320 3321define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 3322; CHECK-LABEL: test_vmulq_laneq_f64_0: 3323; CHECK: // %bb.0: // %entry 3324; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0] 3325; CHECK-NEXT: ret 3326entry: 3327 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 3328 %mul = fmul <2 x double> %shuffle, %a 3329 ret <2 x double> %mul 3330} 3331 3332define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { 3333; CHECK-LABEL: test_vmulx_lane_f32_0: 3334; CHECK: // %bb.0: // %entry 3335; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3336; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0] 3337; CHECK-NEXT: ret 3338entry: 3339 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 3340 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 3341 ret <2 x float> %vmulx2.i 3342} 3343 3344define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 3345; CHECK-LABEL: test_vmulxq_lane_f32_0: 3346; CHECK: // %bb.0: // %entry 3347; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3348; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[0] 3349; CHECK-NEXT: ret 3350entry: 3351 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 3352 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 3353 ret <4 x float> %vmulx2.i 3354} 3355 3356define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { 3357; CHECK-LABEL: test_vmulxq_lane_f64_0: 3358; CHECK: // %bb.0: // %entry 3359; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 3360; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0] 3361; CHECK-NEXT: ret 3362entry: 3363 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 3364 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 3365 ret <2 x double> %vmulx2.i 3366} 3367 3368define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 3369; CHECK-LABEL: test_vmulx_laneq_f32_0: 3370; CHECK: // %bb.0: // %entry 3371; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.s[0] 3372; CHECK-NEXT: ret 3373entry: 3374 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 3375 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 3376 ret <2 x float> %vmulx2.i 3377} 3378 3379define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 3380; CHECK-LABEL: test_vmulxq_laneq_f32_0: 3381; CHECK: // %bb.0: // %entry 3382; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.s[0] 3383; CHECK-NEXT: ret 3384entry: 3385 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 3386 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 3387 ret <4 x float> %vmulx2.i 3388} 3389 3390define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 3391; CHECK-LABEL: test_vmulxq_laneq_f64_0: 3392; CHECK: // %bb.0: // %entry 3393; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.d[0] 3394; CHECK-NEXT: ret 3395entry: 3396 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 3397 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 3398 ret <2 x double> %vmulx2.i 3399} 3400 3401define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { 3402; CHECK-LABEL: optimize_dup: 3403; CHECK: // %bb.0: // %entry 3404; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3] 3405; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[3] 3406; CHECK-NEXT: ret 3407entry: 3408 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 3409 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) 3410 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 3411 %1 = fmul <4 x float> %lane2, %c 3412 %s = fsub <4 x float> %0, %1 3413 ret <4 x float> %s 3414} 3415 3416define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { 3417; CHECK-LABEL: no_optimize_dup: 3418; CHECK: // %bb.0: // %entry 3419; CHECK-NEXT: fmla v0.4s, v1.4s, v3.s[3] 3420; CHECK-NEXT: fmls v0.4s, v2.4s, v3.s[1] 3421; CHECK-NEXT: ret 3422entry: 3423 %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 3424 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) 3425 %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3426 %1 = fmul <4 x float> %lane2, %c 3427 %s = fsub <4 x float> %0, %1 3428 ret <4 x float> %s 3429} 3430 3431define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" { 3432; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: 3433; CHECK: // %bb.0: // %entry 3434; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3435; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1] 3436; CHECK-NEXT: ret 3437entry: 3438 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 3439 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 3440 ret <2 x float> %0 3441} 3442 3443define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" { 3444; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: 3445; CHECK: // %bb.0: // %entry 3446; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 3447; CHECK-NEXT: fmla v0.2s, v1.2s, v2.s[1] 3448; CHECK-NEXT: ret 3449entry: 3450 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 3451 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 3452 ret <2 x float> %0 3453} 3454