1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s 2 3declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 4 5declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 6 7declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 8 9declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 10 11declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 12 13declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 14 15declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 16 17declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 18 19declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 20 21declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 22 23declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 24 25declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 26 27declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 28 29declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 30 31declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 32 33declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 34 35declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 36 37declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 38 39declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 40 41declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 42 43declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 44 45define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 46; CHECK-LABEL: test_vmla_lane_s16: 47; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 48; CHECK-NEXT: ret 49entry: 50 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 51 %mul = mul <4 x i16> %shuffle, %b 52 %add = add <4 x i16> %mul, %a 53 ret <4 x i16> %add 54} 55 56define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 57; CHECK-LABEL: test_vmlaq_lane_s16: 58; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 59; CHECK-NEXT: ret 60entry: 61 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 62 %mul = mul <8 x i16> %shuffle, %b 63 %add = add <8 x i16> %mul, %a 64 ret <8 x i16> %add 65} 66 67define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 68; CHECK-LABEL: test_vmla_lane_s32: 69; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 70; CHECK-NEXT: ret 71entry: 72 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 73 %mul = mul <2 x i32> %shuffle, %b 74 %add = add <2 x i32> %mul, %a 75 ret <2 x i32> %add 76} 77 78define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 79; CHECK-LABEL: test_vmlaq_lane_s32: 80; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 81; CHECK-NEXT: ret 82entry: 83 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 84 %mul = mul <4 x i32> %shuffle, %b 85 %add = add <4 x i32> %mul, %a 86 ret <4 x i32> %add 87} 88 89define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 90; CHECK-LABEL: test_vmla_laneq_s16: 91; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 92; CHECK-NEXT: ret 93entry: 94 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 95 %mul = mul <4 x i16> %shuffle, %b 96 %add = add <4 x i16> %mul, %a 97 ret <4 x i16> %add 98} 99 100define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 101; CHECK-LABEL: test_vmlaq_laneq_s16: 102; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 103; CHECK-NEXT: ret 104entry: 105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 106 %mul = mul <8 x i16> %shuffle, %b 107 %add = add <8 x i16> %mul, %a 108 ret <8 x i16> %add 109} 110 111define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 112; CHECK-LABEL: test_vmla_laneq_s32: 113; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 114; CHECK-NEXT: ret 115entry: 116 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 117 %mul = mul <2 x i32> %shuffle, %b 118 %add = add <2 x i32> %mul, %a 119 ret <2 x i32> %add 120} 121 122define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 123; CHECK-LABEL: test_vmlaq_laneq_s32: 124; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 125; CHECK-NEXT: ret 126entry: 127 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 128 %mul = mul <4 x i32> %shuffle, %b 129 %add = add <4 x i32> %mul, %a 130 ret <4 x i32> %add 131} 132 133define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 134; CHECK-LABEL: test_vmls_lane_s16: 135; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 136; CHECK-NEXT: ret 137entry: 138 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 139 %mul = mul <4 x i16> %shuffle, %b 140 %sub = sub <4 x i16> %a, %mul 141 ret <4 x i16> %sub 142} 143 144define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 145; CHECK-LABEL: test_vmlsq_lane_s16: 146; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 147; CHECK-NEXT: ret 148entry: 149 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 150 %mul = mul <8 x i16> %shuffle, %b 151 %sub = sub <8 x i16> %a, %mul 152 ret <8 x i16> %sub 153} 154 155define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 156; CHECK-LABEL: test_vmls_lane_s32: 157; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 158; CHECK-NEXT: ret 159entry: 160 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 161 %mul = mul <2 x i32> %shuffle, %b 162 %sub = sub <2 x i32> %a, %mul 163 ret <2 x i32> %sub 164} 165 166define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 167; CHECK-LABEL: test_vmlsq_lane_s32: 168; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 169; CHECK-NEXT: ret 170entry: 171 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 172 %mul = mul <4 x i32> %shuffle, %b 173 %sub = sub <4 x i32> %a, %mul 174 ret <4 x i32> %sub 175} 176 177define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 178; CHECK-LABEL: test_vmls_laneq_s16: 179; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 180; CHECK-NEXT: ret 181entry: 182 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 183 %mul = mul <4 x i16> %shuffle, %b 184 %sub = sub <4 x i16> %a, %mul 185 ret <4 x i16> %sub 186} 187 188define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 189; CHECK-LABEL: test_vmlsq_laneq_s16: 190; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 191; CHECK-NEXT: ret 192entry: 193 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 194 %mul = mul <8 x i16> %shuffle, %b 195 %sub = sub <8 x i16> %a, %mul 196 ret <8 x i16> %sub 197} 198 199define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 200; CHECK-LABEL: test_vmls_laneq_s32: 201; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 202; CHECK-NEXT: ret 203entry: 204 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 205 %mul = mul <2 x i32> %shuffle, %b 206 %sub = sub <2 x i32> %a, %mul 207 ret <2 x i32> %sub 208} 209 210define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 211; CHECK-LABEL: test_vmlsq_laneq_s32: 212; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 213; CHECK-NEXT: ret 214entry: 215 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 216 %mul = mul <4 x i32> %shuffle, %b 217 %sub = sub <4 x i32> %a, %mul 218 ret <4 x i32> %sub 219} 220 221define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { 222; CHECK-LABEL: test_vmul_lane_s16: 223; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 224; CHECK-NEXT: ret 225entry: 226 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 227 %mul = mul <4 x i16> %shuffle, %a 228 ret <4 x i16> %mul 229} 230 231define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 232; CHECK-LABEL: test_vmulq_lane_s16: 233; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 234; CHECK-NEXT: ret 235entry: 236 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 237 %mul = mul <8 x i16> %shuffle, %a 238 ret <8 x i16> %mul 239} 240 241define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { 242; CHECK-LABEL: test_vmul_lane_s32: 243; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 244; CHECK-NEXT: ret 245entry: 246 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 247 %mul = mul <2 x i32> %shuffle, %a 248 ret <2 x i32> %mul 249} 250 251define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 252; CHECK-LABEL: test_vmulq_lane_s32: 253; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 254; CHECK-NEXT: ret 255entry: 256 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 257 %mul = mul <4 x i32> %shuffle, %a 258 ret <4 x i32> %mul 259} 260 261define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { 262; CHECK-LABEL: test_vmul_lane_u16: 263; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 264; CHECK-NEXT: ret 265entry: 266 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 267 %mul = mul <4 x i16> %shuffle, %a 268 ret <4 x i16> %mul 269} 270 271define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { 272; CHECK-LABEL: test_vmulq_lane_u16: 273; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 274; CHECK-NEXT: ret 275entry: 276 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 277 %mul = mul <8 x i16> %shuffle, %a 278 ret <8 x i16> %mul 279} 280 281define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { 282; CHECK-LABEL: test_vmul_lane_u32: 283; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 284; CHECK-NEXT: ret 285entry: 286 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 287 %mul = mul <2 x i32> %shuffle, %a 288 ret <2 x i32> %mul 289} 290 291define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { 292; CHECK-LABEL: test_vmulq_lane_u32: 293; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 294; CHECK-NEXT: ret 295entry: 296 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 297 %mul = mul <4 x i32> %shuffle, %a 298 ret <4 x i32> %mul 299} 300 301define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 302; CHECK-LABEL: test_vmul_laneq_s16: 303; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 304; CHECK-NEXT: ret 305entry: 306 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 307 %mul = mul <4 x i16> %shuffle, %a 308 ret <4 x i16> %mul 309} 310 311define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 312; CHECK-LABEL: test_vmulq_laneq_s16: 313; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 314; CHECK-NEXT: ret 315entry: 316 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 317 %mul = mul <8 x i16> %shuffle, %a 318 ret <8 x i16> %mul 319} 320 321define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 322; CHECK-LABEL: test_vmul_laneq_s32: 323; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 324; CHECK-NEXT: ret 325entry: 326 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 327 %mul = mul <2 x i32> %shuffle, %a 328 ret <2 x i32> %mul 329} 330 331define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 332; CHECK-LABEL: test_vmulq_laneq_s32: 333; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 334; CHECK-NEXT: ret 335entry: 336 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 337 %mul = mul <4 x i32> %shuffle, %a 338 ret <4 x i32> %mul 339} 340 341define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 342; CHECK-LABEL: test_vmul_laneq_u16: 343; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 344; CHECK-NEXT: ret 345entry: 346 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 347 %mul = mul <4 x i16> %shuffle, %a 348 ret <4 x i16> %mul 349} 350 351define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 352; CHECK-LABEL: test_vmulq_laneq_u16: 353; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 354; CHECK-NEXT: ret 355entry: 356 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 357 %mul = mul <8 x i16> %shuffle, %a 358 ret <8 x i16> %mul 359} 360 361define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 362; CHECK-LABEL: test_vmul_laneq_u32: 363; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 364; CHECK-NEXT: ret 365entry: 366 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 367 %mul = mul <2 x i32> %shuffle, %a 368 ret <2 x i32> %mul 369} 370 371define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 372; CHECK-LABEL: test_vmulq_laneq_u32: 373; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 374; CHECK-NEXT: ret 375entry: 376 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 377 %mul = mul <4 x i32> %shuffle, %a 378 ret <4 x i32> %mul 379} 380 381define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 382; CHECK-LABEL: test_vfma_lane_f32: 383; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 384; CHECK-NEXT: ret 385entry: 386 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 387 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 388 ret <2 x float> %0 389} 390 391declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 392 393define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 394; CHECK-LABEL: test_vfmaq_lane_f32: 395; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 396; CHECK-NEXT: ret 397entry: 398 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 399 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 400 ret <4 x float> %0 401} 402 403declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 404 405define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 406; CHECK-LABEL: test_vfma_laneq_f32: 407; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 408; CHECK-NEXT: ret 409entry: 410 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 411 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 412 ret <2 x float> %0 413} 414 415define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 416; CHECK-LABEL: test_vfmaq_laneq_f32: 417; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 418; CHECK-NEXT: ret 419entry: 420 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 421 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 422 ret <4 x float> %0 423} 424 425define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 426; CHECK-LABEL: test_vfms_lane_f32: 427; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 428; CHECK-NEXT: ret 429entry: 430 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 431 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 432 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 433 ret <2 x float> %0 434} 435 436define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 437; CHECK-LABEL: test_vfmsq_lane_f32: 438; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 439; CHECK-NEXT: ret 440entry: 441 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 442 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 443 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 444 ret <4 x float> %0 445} 446 447define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 448; CHECK-LABEL: test_vfms_laneq_f32: 449; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 450; CHECK-NEXT: ret 451entry: 452 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 453 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> 454 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 455 ret <2 x float> %0 456} 457 458define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 459; CHECK-LABEL: test_vfmsq_laneq_f32: 460; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 461; CHECK-NEXT: ret 462entry: 463 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 464 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 465 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 466 ret <4 x float> %0 467} 468 469define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 470; CHECK-LABEL: test_vfmaq_lane_f64: 471; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 472; CHECK-NEXT: ret 473entry: 474 %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 475 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 476 ret <2 x double> %0 477} 478 479declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 480 481define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 482; CHECK-LABEL: test_vfmaq_laneq_f64: 483; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 484; CHECK-NEXT: ret 485entry: 486 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 487 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 488 ret <2 x double> %0 489} 490 491define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { 492; CHECK-LABEL: test_vfmsq_lane_f64: 493; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 494; CHECK-NEXT: ret 495entry: 496 %sub = fsub <1 x double> <double -0.000000e+00>, %v 497 %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer 498 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 499 ret <2 x double> %0 500} 501 502define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 503; CHECK-LABEL: test_vfmsq_laneq_f64: 504; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 505; CHECK-NEXT: ret 506entry: 507 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 508 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> 509 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 510 ret <2 x double> %0 511} 512 513define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { 514; CHECK-LABEL: test_vfmas_laneq_f32 515; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 516; CHECK-NEXT: ret 517entry: 518 %extract = extractelement <4 x float> %v, i32 3 519 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 520 ret float %0 521} 522 523declare float @llvm.fma.f32(float, float, float) 524 525define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { 526; CHECK-LABEL: test_vfmsd_lane_f64 527; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 528; CHECK-NEXT: ret 529entry: 530 %extract.rhs = extractelement <1 x double> %v, i32 0 531 %extract = fsub double -0.000000e+00, %extract.rhs 532 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 533 ret double %0 534} 535 536declare double @llvm.fma.f64(double, double, double) 537 538define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { 539; CHECK-LABEL: test_vfmss_laneq_f32 540; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] 541; CHECK-NEXT: ret 542entry: 543 %extract.rhs = extractelement <4 x float> %v, i32 3 544 %extract = fsub float -0.000000e+00, %extract.rhs 545 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) 546 ret float %0 547} 548 549define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { 550; CHECK-LABEL: test_vfmsd_laneq_f64 551; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 552; CHECK-NEXT: ret 553entry: 554 %extract.rhs = extractelement <2 x double> %v, i32 1 555 %extract = fsub double -0.000000e+00, %extract.rhs 556 %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) 557 ret double %0 558} 559 560define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 561; CHECK-LABEL: test_vmlal_lane_s16: 562; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 563; CHECK-NEXT: ret 564entry: 565 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 566 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 567 %add = add <4 x i32> %vmull2.i, %a 568 ret <4 x i32> %add 569} 570 571define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 572; CHECK-LABEL: test_vmlal_lane_s32: 573; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 574; CHECK-NEXT: ret 575entry: 576 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 577 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 578 %add = add <2 x i64> %vmull2.i, %a 579 ret <2 x i64> %add 580} 581 582define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 583; CHECK-LABEL: test_vmlal_laneq_s16: 584; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 585; CHECK-NEXT: ret 586entry: 587 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 588 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 589 %add = add <4 x i32> %vmull2.i, %a 590 ret <4 x i32> %add 591} 592 593define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 594; CHECK-LABEL: test_vmlal_laneq_s32: 595; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 596; CHECK-NEXT: ret 597entry: 598 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 599 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 600 %add = add <2 x i64> %vmull2.i, %a 601 ret <2 x i64> %add 602} 603 604define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 605; CHECK-LABEL: test_vmlal_high_lane_s16: 606; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 607; CHECK-NEXT: ret 608entry: 609 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 610 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 611 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 612 %add = add <4 x i32> %vmull2.i, %a 613 ret <4 x i32> %add 614} 615 616define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 617; CHECK-LABEL: test_vmlal_high_lane_s32: 618; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 619; CHECK-NEXT: ret 620entry: 621 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 622 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 623 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 624 %add = add <2 x i64> %vmull2.i, %a 625 ret <2 x i64> %add 626} 627 628define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 629; CHECK-LABEL: test_vmlal_high_laneq_s16: 630; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 631; CHECK-NEXT: ret 632entry: 633 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 634 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 635 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 636 %add = add <4 x i32> %vmull2.i, %a 637 ret <4 x i32> %add 638} 639 640define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 641; CHECK-LABEL: test_vmlal_high_laneq_s32: 642; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 643; CHECK-NEXT: ret 644entry: 645 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 646 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 647 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 648 %add = add <2 x i64> %vmull2.i, %a 649 ret <2 x i64> %add 650} 651 652define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 653; CHECK-LABEL: test_vmlsl_lane_s16: 654; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 655; CHECK-NEXT: ret 656entry: 657 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 658 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 659 %sub = sub <4 x i32> %a, %vmull2.i 660 ret <4 x i32> %sub 661} 662 663define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 664; CHECK-LABEL: test_vmlsl_lane_s32: 665; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 666; CHECK-NEXT: ret 667entry: 668 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 669 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 670 %sub = sub <2 x i64> %a, %vmull2.i 671 ret <2 x i64> %sub 672} 673 674define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 675; CHECK-LABEL: test_vmlsl_laneq_s16: 676; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 677; CHECK-NEXT: ret 678entry: 679 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 680 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 681 %sub = sub <4 x i32> %a, %vmull2.i 682 ret <4 x i32> %sub 683} 684 685define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 686; CHECK-LABEL: test_vmlsl_laneq_s32: 687; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 688; CHECK-NEXT: ret 689entry: 690 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 691 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 692 %sub = sub <2 x i64> %a, %vmull2.i 693 ret <2 x i64> %sub 694} 695 696define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 697; CHECK-LABEL: test_vmlsl_high_lane_s16: 698; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 699; CHECK-NEXT: ret 700entry: 701 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 702 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 703 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 704 %sub = sub <4 x i32> %a, %vmull2.i 705 ret <4 x i32> %sub 706} 707 708define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 709; CHECK-LABEL: test_vmlsl_high_lane_s32: 710; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 711; CHECK-NEXT: ret 712entry: 713 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 714 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 715 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 716 %sub = sub <2 x i64> %a, %vmull2.i 717 ret <2 x i64> %sub 718} 719 720define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 721; CHECK-LABEL: test_vmlsl_high_laneq_s16: 722; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 723; CHECK-NEXT: ret 724entry: 725 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 726 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 727 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 728 %sub = sub <4 x i32> %a, %vmull2.i 729 ret <4 x i32> %sub 730} 731 732define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 733; CHECK-LABEL: test_vmlsl_high_laneq_s32: 734; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 735; CHECK-NEXT: ret 736entry: 737 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 738 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 739 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 740 %sub = sub <2 x i64> %a, %vmull2.i 741 ret <2 x i64> %sub 742} 743 744define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 745; CHECK-LABEL: test_vmlal_lane_u16: 746; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 747; CHECK-NEXT: ret 748entry: 749 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 750 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 751 %add = add <4 x i32> %vmull2.i, %a 752 ret <4 x i32> %add 753} 754 755define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 756; CHECK-LABEL: test_vmlal_lane_u32: 757; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 758; CHECK-NEXT: ret 759entry: 760 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 761 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 762 %add = add <2 x i64> %vmull2.i, %a 763 ret <2 x i64> %add 764} 765 766define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 767; CHECK-LABEL: test_vmlal_laneq_u16: 768; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 769; CHECK-NEXT: ret 770entry: 771 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 772 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 773 %add = add <4 x i32> %vmull2.i, %a 774 ret <4 x i32> %add 775} 776 777define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 778; CHECK-LABEL: test_vmlal_laneq_u32: 779; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 780; CHECK-NEXT: ret 781entry: 782 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 783 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 784 %add = add <2 x i64> %vmull2.i, %a 785 ret <2 x i64> %add 786} 787 788define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 789; CHECK-LABEL: test_vmlal_high_lane_u16: 790; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 791; CHECK-NEXT: ret 792entry: 793 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 794 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 795 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 796 %add = add <4 x i32> %vmull2.i, %a 797 ret <4 x i32> %add 798} 799 800define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 801; CHECK-LABEL: test_vmlal_high_lane_u32: 802; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 803; CHECK-NEXT: ret 804entry: 805 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 806 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 807 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 808 %add = add <2 x i64> %vmull2.i, %a 809 ret <2 x i64> %add 810} 811 812define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 813; CHECK-LABEL: test_vmlal_high_laneq_u16: 814; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 815; CHECK-NEXT: ret 816entry: 817 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 818 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 819 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 820 %add = add <4 x i32> %vmull2.i, %a 821 ret <4 x i32> %add 822} 823 824define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 825; CHECK-LABEL: test_vmlal_high_laneq_u32: 826; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 827; CHECK-NEXT: ret 828entry: 829 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 830 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 831 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 832 %add = add <2 x i64> %vmull2.i, %a 833 ret <2 x i64> %add 834} 835 836define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 837; CHECK-LABEL: test_vmlsl_lane_u16: 838; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 839; CHECK-NEXT: ret 840entry: 841 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 842 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 843 %sub = sub <4 x i32> %a, %vmull2.i 844 ret <4 x i32> %sub 845} 846 847define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 848; CHECK-LABEL: test_vmlsl_lane_u32: 849; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 850; CHECK-NEXT: ret 851entry: 852 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 853 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 854 %sub = sub <2 x i64> %a, %vmull2.i 855 ret <2 x i64> %sub 856} 857 858define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 859; CHECK-LABEL: test_vmlsl_laneq_u16: 860; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 861; CHECK-NEXT: ret 862entry: 863 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 864 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 865 %sub = sub <4 x i32> %a, %vmull2.i 866 ret <4 x i32> %sub 867} 868 869define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 870; CHECK-LABEL: test_vmlsl_laneq_u32: 871; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 872; CHECK-NEXT: ret 873entry: 874 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 875 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 876 %sub = sub <2 x i64> %a, %vmull2.i 877 ret <2 x i64> %sub 878} 879 880define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 881; CHECK-LABEL: test_vmlsl_high_lane_u16: 882; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 883; CHECK-NEXT: ret 884entry: 885 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 886 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 887 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 888 %sub = sub <4 x i32> %a, %vmull2.i 889 ret <4 x i32> %sub 890} 891 892define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 893; CHECK-LABEL: test_vmlsl_high_lane_u32: 894; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 895; CHECK-NEXT: ret 896entry: 897 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 898 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 899 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 900 %sub = sub <2 x i64> %a, %vmull2.i 901 ret <2 x i64> %sub 902} 903 904define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 905; CHECK-LABEL: test_vmlsl_high_laneq_u16: 906; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 907; CHECK-NEXT: ret 908entry: 909 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 910 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 911 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 912 %sub = sub <4 x i32> %a, %vmull2.i 913 ret <4 x i32> %sub 914} 915 916define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 917; CHECK-LABEL: test_vmlsl_high_laneq_u32: 918; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 919; CHECK-NEXT: ret 920entry: 921 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 922 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 923 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 924 %sub = sub <2 x i64> %a, %vmull2.i 925 ret <2 x i64> %sub 926} 927 928define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 929; CHECK-LABEL: test_vmull_lane_s16: 930; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 931; CHECK-NEXT: ret 932entry: 933 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 934 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 935 ret <4 x i32> %vmull2.i 936} 937 938define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 939; CHECK-LABEL: test_vmull_lane_s32: 940; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 941; CHECK-NEXT: ret 942entry: 943 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 944 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 945 ret <2 x i64> %vmull2.i 946} 947 948define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { 949; CHECK-LABEL: test_vmull_lane_u16: 950; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 951; CHECK-NEXT: ret 952entry: 953 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 954 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 955 ret <4 x i32> %vmull2.i 956} 957 958define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { 959; CHECK-LABEL: test_vmull_lane_u32: 960; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 961; CHECK-NEXT: ret 962entry: 963 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 964 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 965 ret <2 x i64> %vmull2.i 966} 967 968define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 969; CHECK-LABEL: test_vmull_high_lane_s16: 970; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 971; CHECK-NEXT: ret 972entry: 973 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 974 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 975 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 976 ret <4 x i32> %vmull2.i 977} 978 979define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 980; CHECK-LABEL: test_vmull_high_lane_s32: 981; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 982; CHECK-NEXT: ret 983entry: 984 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 985 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 986 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 987 ret <2 x i64> %vmull2.i 988} 989 990define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { 991; CHECK-LABEL: test_vmull_high_lane_u16: 992; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 993; CHECK-NEXT: ret 994entry: 995 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 996 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 997 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 998 ret <4 x i32> %vmull2.i 999} 1000 1001define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { 1002; CHECK-LABEL: test_vmull_high_lane_u32: 1003; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1004; CHECK-NEXT: ret 1005entry: 1006 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1007 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1008 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1009 ret <2 x i64> %vmull2.i 1010} 1011 1012define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1013; CHECK-LABEL: test_vmull_laneq_s16: 1014; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1015; CHECK-NEXT: ret 1016entry: 1017 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1018 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1019 ret <4 x i32> %vmull2.i 1020} 1021 1022define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1023; CHECK-LABEL: test_vmull_laneq_s32: 1024; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1025; CHECK-NEXT: ret 1026entry: 1027 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1028 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1029 ret <2 x i64> %vmull2.i 1030} 1031 1032define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { 1033; CHECK-LABEL: test_vmull_laneq_u16: 1034; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] 1035; CHECK-NEXT: ret 1036entry: 1037 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1038 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1039 ret <4 x i32> %vmull2.i 1040} 1041 1042define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { 1043; CHECK-LABEL: test_vmull_laneq_u32: 1044; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1045; CHECK-NEXT: ret 1046entry: 1047 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1048 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1049 ret <2 x i64> %vmull2.i 1050} 1051 1052define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1053; CHECK-LABEL: test_vmull_high_laneq_s16: 1054; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1055; CHECK-NEXT: ret 1056entry: 1057 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1058 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1059 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1060 ret <4 x i32> %vmull2.i 1061} 1062 1063define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1064; CHECK-LABEL: test_vmull_high_laneq_s32: 1065; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1066; CHECK-NEXT: ret 1067entry: 1068 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1069 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1070 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1071 ret <2 x i64> %vmull2.i 1072} 1073 1074define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { 1075; CHECK-LABEL: test_vmull_high_laneq_u16: 1076; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1077; CHECK-NEXT: ret 1078entry: 1079 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1080 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1081 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1082 ret <4 x i32> %vmull2.i 1083} 1084 1085define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { 1086; CHECK-LABEL: test_vmull_high_laneq_u32: 1087; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1088; CHECK-NEXT: ret 1089entry: 1090 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1091 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1092 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1093 ret <2 x i64> %vmull2.i 1094} 1095 1096define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1097; CHECK-LABEL: test_vqdmlal_lane_s16: 1098; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1099; CHECK-NEXT: ret 1100entry: 1101 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1102 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1103 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1104 ret <4 x i32> %vqdmlal4.i 1105} 1106 1107define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1108; CHECK-LABEL: test_vqdmlal_lane_s32: 1109; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1110; CHECK-NEXT: ret 1111entry: 1112 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1113 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1114 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1115 ret <2 x i64> %vqdmlal4.i 1116} 1117 1118define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1119; CHECK-LABEL: test_vqdmlal_high_lane_s16: 1120; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1121; CHECK-NEXT: ret 1122entry: 1123 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1124 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1125 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1126 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 1127 ret <4 x i32> %vqdmlal4.i 1128} 1129 1130define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1131; CHECK-LABEL: test_vqdmlal_high_lane_s32: 1132; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1133; CHECK-NEXT: ret 1134entry: 1135 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1136 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1137 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1138 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 1139 ret <2 x i64> %vqdmlal4.i 1140} 1141 1142define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1143; CHECK-LABEL: test_vqdmlsl_lane_s16: 1144; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1145; CHECK-NEXT: ret 1146entry: 1147 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1148 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1149 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1150 ret <4 x i32> %vqdmlsl4.i 1151} 1152 1153define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1154; CHECK-LABEL: test_vqdmlsl_lane_s32: 1155; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1156; CHECK-NEXT: ret 1157entry: 1158 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1159 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1160 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1161 ret <2 x i64> %vqdmlsl4.i 1162} 1163 1164define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1165; CHECK-LABEL: test_vqdmlsl_high_lane_s16: 1166; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1167; CHECK-NEXT: ret 1168entry: 1169 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1170 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1171 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1172 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 1173 ret <4 x i32> %vqdmlsl4.i 1174} 1175 1176define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1177; CHECK-LABEL: test_vqdmlsl_high_lane_s32: 1178; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1179; CHECK-NEXT: ret 1180entry: 1181 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1182 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1183 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1184 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 1185 ret <2 x i64> %vqdmlsl4.i 1186} 1187 1188define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1189; CHECK-LABEL: test_vqdmull_lane_s16: 1190; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1191; CHECK-NEXT: ret 1192entry: 1193 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1194 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1195 ret <4 x i32> %vqdmull2.i 1196} 1197 1198define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1199; CHECK-LABEL: test_vqdmull_lane_s32: 1200; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1201; CHECK-NEXT: ret 1202entry: 1203 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1204 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1205 ret <2 x i64> %vqdmull2.i 1206} 1207 1208define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { 1209; CHECK-LABEL: test_vqdmull_laneq_s16: 1210; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1211; CHECK-NEXT: ret 1212entry: 1213 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1214 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 1215 ret <4 x i32> %vqdmull2.i 1216} 1217 1218define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { 1219; CHECK-LABEL: test_vqdmull_laneq_s32: 1220; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1221; CHECK-NEXT: ret 1222entry: 1223 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1224 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 1225 ret <2 x i64> %vqdmull2.i 1226} 1227 1228define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1229; CHECK-LABEL: test_vqdmull_high_lane_s16: 1230; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1231; CHECK-NEXT: ret 1232entry: 1233 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1234 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1235 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1236 ret <4 x i32> %vqdmull2.i 1237} 1238 1239define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1240; CHECK-LABEL: test_vqdmull_high_lane_s32: 1241; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1242; CHECK-NEXT: ret 1243entry: 1244 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1245 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1246 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1247 ret <2 x i64> %vqdmull2.i 1248} 1249 1250define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { 1251; CHECK-LABEL: test_vqdmull_high_laneq_s16: 1252; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] 1253; CHECK-NEXT: ret 1254entry: 1255 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1256 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1257 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1258 ret <4 x i32> %vqdmull2.i 1259} 1260 1261define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { 1262; CHECK-LABEL: test_vqdmull_high_laneq_s32: 1263; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1264; CHECK-NEXT: ret 1265entry: 1266 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1267 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 1268 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 1269 ret <2 x i64> %vqdmull2.i 1270} 1271 1272define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1273; CHECK-LABEL: test_vqdmulh_lane_s16: 1274; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1275; CHECK-NEXT: ret 1276entry: 1277 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1278 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1279 ret <4 x i16> %vqdmulh2.i 1280} 1281 1282define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1283; CHECK-LABEL: test_vqdmulhq_lane_s16: 1284; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1285; CHECK-NEXT: ret 1286entry: 1287 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1288 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1289 ret <8 x i16> %vqdmulh2.i 1290} 1291 1292define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1293; CHECK-LABEL: test_vqdmulh_lane_s32: 1294; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1295; CHECK-NEXT: ret 1296entry: 1297 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1298 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1299 ret <2 x i32> %vqdmulh2.i 1300} 1301 1302define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1303; CHECK-LABEL: test_vqdmulhq_lane_s32: 1304; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1305; CHECK-NEXT: ret 1306entry: 1307 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1308 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1309 ret <4 x i32> %vqdmulh2.i 1310} 1311 1312define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { 1313; CHECK-LABEL: test_vqrdmulh_lane_s16: 1314; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] 1315; CHECK-NEXT: ret 1316entry: 1317 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1318 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 1319 ret <4 x i16> %vqrdmulh2.i 1320} 1321 1322define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { 1323; CHECK-LABEL: test_vqrdmulhq_lane_s16: 1324; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] 1325; CHECK-NEXT: ret 1326entry: 1327 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1328 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 1329 ret <8 x i16> %vqrdmulh2.i 1330} 1331 1332define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { 1333; CHECK-LABEL: test_vqrdmulh_lane_s32: 1334; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1335; CHECK-NEXT: ret 1336entry: 1337 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 1338 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 1339 ret <2 x i32> %vqrdmulh2.i 1340} 1341 1342define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { 1343; CHECK-LABEL: test_vqrdmulhq_lane_s32: 1344; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1345; CHECK-NEXT: ret 1346entry: 1347 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1348 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 1349 ret <4 x i32> %vqrdmulh2.i 1350} 1351 1352define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { 1353; CHECK-LABEL: test_vmul_lane_f32: 1354; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1355; CHECK-NEXT: ret 1356entry: 1357 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1358 %mul = fmul <2 x float> %shuffle, %a 1359 ret <2 x float> %mul 1360} 1361 1362define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { 1363; CHECK-LABEL: test_vmul_lane_f64: 1364; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} 1365; CHECK-NEXT: ret 1366entry: 1367 %0 = bitcast <1 x double> %a to <8 x i8> 1368 %1 = bitcast <8 x i8> %0 to double 1369 %extract = extractelement <1 x double> %v, i32 0 1370 %2 = fmul double %1, %extract 1371 %3 = insertelement <1 x double> undef, double %2, i32 0 1372 ret <1 x double> %3 1373} 1374 1375define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { 1376; CHECK-LABEL: test_vmulq_lane_f32: 1377; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1378; CHECK-NEXT: ret 1379entry: 1380 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1381 %mul = fmul <4 x float> %shuffle, %a 1382 ret <4 x float> %mul 1383} 1384 1385define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { 1386; CHECK-LABEL: test_vmulq_lane_f64: 1387; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1388; CHECK-NEXT: ret 1389entry: 1390 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1391 %mul = fmul <2 x double> %shuffle, %a 1392 ret <2 x double> %mul 1393} 1394 1395define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { 1396; CHECK-LABEL: test_vmul_laneq_f32: 1397; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1398; CHECK-NEXT: ret 1399entry: 1400 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1401 %mul = fmul <2 x float> %shuffle, %a 1402 ret <2 x float> %mul 1403} 1404 1405define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { 1406; CHECK-LABEL: test_vmul_laneq_f64: 1407; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] 1408; CHECK-NEXT: ret 1409entry: 1410 %0 = bitcast <1 x double> %a to <8 x i8> 1411 %1 = bitcast <8 x i8> %0 to double 1412 %extract = extractelement <2 x double> %v, i32 1 1413 %2 = fmul double %1, %extract 1414 %3 = insertelement <1 x double> undef, double %2, i32 0 1415 ret <1 x double> %3 1416} 1417 1418define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1419; CHECK-LABEL: test_vmulq_laneq_f32: 1420; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1421; CHECK-NEXT: ret 1422entry: 1423 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1424 %mul = fmul <4 x float> %shuffle, %a 1425 ret <4 x float> %mul 1426} 1427 1428define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1429; CHECK-LABEL: test_vmulq_laneq_f64: 1430; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1431; CHECK-NEXT: ret 1432entry: 1433 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1434 %mul = fmul <2 x double> %shuffle, %a 1435 ret <2 x double> %mul 1436} 1437 1438define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { 1439; CHECK-LABEL: test_vmulx_lane_f32: 1440; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] 1441; CHECK-NEXT: ret 1442entry: 1443 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 1444 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1445 ret <2 x float> %vmulx2.i 1446} 1447 1448define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { 1449; CHECK-LABEL: test_vmulxq_lane_f32: 1450; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] 1451; CHECK-NEXT: ret 1452entry: 1453 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1454 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1455 ret <4 x float> %vmulx2.i 1456} 1457 1458define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { 1459; CHECK-LABEL: test_vmulxq_lane_f64: 1460; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1461; CHECK-NEXT: ret 1462entry: 1463 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 1464 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1465 ret <2 x double> %vmulx2.i 1466} 1467 1468define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { 1469; CHECK-LABEL: test_vmulx_laneq_f32: 1470; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] 1471; CHECK-NEXT: ret 1472entry: 1473 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 1474 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 1475 ret <2 x float> %vmulx2.i 1476} 1477 1478define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { 1479; CHECK-LABEL: test_vmulxq_laneq_f32: 1480; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] 1481; CHECK-NEXT: ret 1482entry: 1483 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1484 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 1485 ret <4 x float> %vmulx2.i 1486} 1487 1488define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { 1489; CHECK-LABEL: test_vmulxq_laneq_f64: 1490; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] 1491; CHECK-NEXT: ret 1492entry: 1493 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> 1494 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 1495 ret <2 x double> %vmulx2.i 1496} 1497 1498define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1499; CHECK-LABEL: test_vmla_lane_s16_0: 1500; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1501; CHECK-NEXT: ret 1502entry: 1503 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1504 %mul = mul <4 x i16> %shuffle, %b 1505 %add = add <4 x i16> %mul, %a 1506 ret <4 x i16> %add 1507} 1508 1509define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1510; CHECK-LABEL: test_vmlaq_lane_s16_0: 1511; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1512; CHECK-NEXT: ret 1513entry: 1514 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1515 %mul = mul <8 x i16> %shuffle, %b 1516 %add = add <8 x i16> %mul, %a 1517 ret <8 x i16> %add 1518} 1519 1520define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1521; CHECK-LABEL: test_vmla_lane_s32_0: 1522; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1523; CHECK-NEXT: ret 1524entry: 1525 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1526 %mul = mul <2 x i32> %shuffle, %b 1527 %add = add <2 x i32> %mul, %a 1528 ret <2 x i32> %add 1529} 1530 1531define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1532; CHECK-LABEL: test_vmlaq_lane_s32_0: 1533; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1534; CHECK-NEXT: ret 1535entry: 1536 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1537 %mul = mul <4 x i32> %shuffle, %b 1538 %add = add <4 x i32> %mul, %a 1539 ret <4 x i32> %add 1540} 1541 1542define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1543; CHECK-LABEL: test_vmla_laneq_s16_0: 1544; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1545; CHECK-NEXT: ret 1546entry: 1547 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1548 %mul = mul <4 x i16> %shuffle, %b 1549 %add = add <4 x i16> %mul, %a 1550 ret <4 x i16> %add 1551} 1552 1553define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1554; CHECK-LABEL: test_vmlaq_laneq_s16_0: 1555; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1556; CHECK-NEXT: ret 1557entry: 1558 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1559 %mul = mul <8 x i16> %shuffle, %b 1560 %add = add <8 x i16> %mul, %a 1561 ret <8 x i16> %add 1562} 1563 1564define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1565; CHECK-LABEL: test_vmla_laneq_s32_0: 1566; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1567; CHECK-NEXT: ret 1568entry: 1569 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1570 %mul = mul <2 x i32> %shuffle, %b 1571 %add = add <2 x i32> %mul, %a 1572 ret <2 x i32> %add 1573} 1574 1575define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1576; CHECK-LABEL: test_vmlaq_laneq_s32_0: 1577; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1578; CHECK-NEXT: ret 1579entry: 1580 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1581 %mul = mul <4 x i32> %shuffle, %b 1582 %add = add <4 x i32> %mul, %a 1583 ret <4 x i32> %add 1584} 1585 1586define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { 1587; CHECK-LABEL: test_vmls_lane_s16_0: 1588; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1589; CHECK-NEXT: ret 1590entry: 1591 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1592 %mul = mul <4 x i16> %shuffle, %b 1593 %sub = sub <4 x i16> %a, %mul 1594 ret <4 x i16> %sub 1595} 1596 1597define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { 1598; CHECK-LABEL: test_vmlsq_lane_s16_0: 1599; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1600; CHECK-NEXT: ret 1601entry: 1602 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1603 %mul = mul <8 x i16> %shuffle, %b 1604 %sub = sub <8 x i16> %a, %mul 1605 ret <8 x i16> %sub 1606} 1607 1608define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { 1609; CHECK-LABEL: test_vmls_lane_s32_0: 1610; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1611; CHECK-NEXT: ret 1612entry: 1613 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1614 %mul = mul <2 x i32> %shuffle, %b 1615 %sub = sub <2 x i32> %a, %mul 1616 ret <2 x i32> %sub 1617} 1618 1619define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { 1620; CHECK-LABEL: test_vmlsq_lane_s32_0: 1621; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1622; CHECK-NEXT: ret 1623entry: 1624 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1625 %mul = mul <4 x i32> %shuffle, %b 1626 %sub = sub <4 x i32> %a, %mul 1627 ret <4 x i32> %sub 1628} 1629 1630define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { 1631; CHECK-LABEL: test_vmls_laneq_s16_0: 1632; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1633; CHECK-NEXT: ret 1634entry: 1635 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1636 %mul = mul <4 x i16> %shuffle, %b 1637 %sub = sub <4 x i16> %a, %mul 1638 ret <4 x i16> %sub 1639} 1640 1641define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { 1642; CHECK-LABEL: test_vmlsq_laneq_s16_0: 1643; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1644; CHECK-NEXT: ret 1645entry: 1646 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1647 %mul = mul <8 x i16> %shuffle, %b 1648 %sub = sub <8 x i16> %a, %mul 1649 ret <8 x i16> %sub 1650} 1651 1652define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { 1653; CHECK-LABEL: test_vmls_laneq_s32_0: 1654; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1655; CHECK-NEXT: ret 1656entry: 1657 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1658 %mul = mul <2 x i32> %shuffle, %b 1659 %sub = sub <2 x i32> %a, %mul 1660 ret <2 x i32> %sub 1661} 1662 1663define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { 1664; CHECK-LABEL: test_vmlsq_laneq_s32_0: 1665; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1666; CHECK-NEXT: ret 1667entry: 1668 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1669 %mul = mul <4 x i32> %shuffle, %b 1670 %sub = sub <4 x i32> %a, %mul 1671 ret <4 x i32> %sub 1672} 1673 1674define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 1675; CHECK-LABEL: test_vmul_lane_s16_0: 1676; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1677; CHECK-NEXT: ret 1678entry: 1679 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1680 %mul = mul <4 x i16> %shuffle, %a 1681 ret <4 x i16> %mul 1682} 1683 1684define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 1685; CHECK-LABEL: test_vmulq_lane_s16_0: 1686; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1687; CHECK-NEXT: ret 1688entry: 1689 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1690 %mul = mul <8 x i16> %shuffle, %a 1691 ret <8 x i16> %mul 1692} 1693 1694define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 1695; CHECK-LABEL: test_vmul_lane_s32_0: 1696; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1697; CHECK-NEXT: ret 1698entry: 1699 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1700 %mul = mul <2 x i32> %shuffle, %a 1701 ret <2 x i32> %mul 1702} 1703 1704define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 1705; CHECK-LABEL: test_vmulq_lane_s32_0: 1706; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1707; CHECK-NEXT: ret 1708entry: 1709 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1710 %mul = mul <4 x i32> %shuffle, %a 1711 ret <4 x i32> %mul 1712} 1713 1714define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 1715; CHECK-LABEL: test_vmul_lane_u16_0: 1716; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1717; CHECK-NEXT: ret 1718entry: 1719 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1720 %mul = mul <4 x i16> %shuffle, %a 1721 ret <4 x i16> %mul 1722} 1723 1724define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 1725; CHECK-LABEL: test_vmulq_lane_u16_0: 1726; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1727; CHECK-NEXT: ret 1728entry: 1729 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 1730 %mul = mul <8 x i16> %shuffle, %a 1731 ret <8 x i16> %mul 1732} 1733 1734define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 1735; CHECK-LABEL: test_vmul_lane_u32_0: 1736; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1737; CHECK-NEXT: ret 1738entry: 1739 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1740 %mul = mul <2 x i32> %shuffle, %a 1741 ret <2 x i32> %mul 1742} 1743 1744define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 1745; CHECK-LABEL: test_vmulq_lane_u32_0: 1746; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1747; CHECK-NEXT: ret 1748entry: 1749 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 1750 %mul = mul <4 x i32> %shuffle, %a 1751 ret <4 x i32> %mul 1752} 1753 1754define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 1755; CHECK-LABEL: test_vmul_laneq_s16_0: 1756; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1757; CHECK-NEXT: ret 1758entry: 1759 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1760 %mul = mul <4 x i16> %shuffle, %a 1761 ret <4 x i16> %mul 1762} 1763 1764define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 1765; CHECK-LABEL: test_vmulq_laneq_s16_0: 1766; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1767; CHECK-NEXT: ret 1768entry: 1769 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1770 %mul = mul <8 x i16> %shuffle, %a 1771 ret <8 x i16> %mul 1772} 1773 1774define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 1775; CHECK-LABEL: test_vmul_laneq_s32_0: 1776; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1777; CHECK-NEXT: ret 1778entry: 1779 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1780 %mul = mul <2 x i32> %shuffle, %a 1781 ret <2 x i32> %mul 1782} 1783 1784define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 1785; CHECK-LABEL: test_vmulq_laneq_s32_0: 1786; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1787; CHECK-NEXT: ret 1788entry: 1789 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1790 %mul = mul <4 x i32> %shuffle, %a 1791 ret <4 x i32> %mul 1792} 1793 1794define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 1795; CHECK-LABEL: test_vmul_laneq_u16_0: 1796; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1797; CHECK-NEXT: ret 1798entry: 1799 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1800 %mul = mul <4 x i16> %shuffle, %a 1801 ret <4 x i16> %mul 1802} 1803 1804define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 1805; CHECK-LABEL: test_vmulq_laneq_u16_0: 1806; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1807; CHECK-NEXT: ret 1808entry: 1809 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer 1810 %mul = mul <8 x i16> %shuffle, %a 1811 ret <8 x i16> %mul 1812} 1813 1814define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 1815; CHECK-LABEL: test_vmul_laneq_u32_0: 1816; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1817; CHECK-NEXT: ret 1818entry: 1819 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1820 %mul = mul <2 x i32> %shuffle, %a 1821 ret <2 x i32> %mul 1822} 1823 1824define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 1825; CHECK-LABEL: test_vmulq_laneq_u32_0: 1826; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1827; CHECK-NEXT: ret 1828entry: 1829 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer 1830 %mul = mul <4 x i32> %shuffle, %a 1831 ret <4 x i32> %mul 1832} 1833 1834define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1835; CHECK-LABEL: test_vfma_lane_f32_0: 1836; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1837; CHECK-NEXT: ret 1838entry: 1839 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 1840 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1841 ret <2 x float> %0 1842} 1843 1844define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1845; CHECK-LABEL: test_vfmaq_lane_f32_0: 1846; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1847; CHECK-NEXT: ret 1848entry: 1849 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 1850 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1851 ret <4 x float> %0 1852} 1853 1854define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1855; CHECK-LABEL: test_vfma_laneq_f32_0: 1856; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1857; CHECK-NEXT: ret 1858entry: 1859 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 1860 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1861 ret <2 x float> %0 1862} 1863 1864define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1865; CHECK-LABEL: test_vfmaq_laneq_f32_0: 1866; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1867; CHECK-NEXT: ret 1868entry: 1869 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 1870 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1871 ret <4 x float> %0 1872} 1873 1874define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { 1875; CHECK-LABEL: test_vfms_lane_f32_0: 1876; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1877; CHECK-NEXT: ret 1878entry: 1879 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1880 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer 1881 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1882 ret <2 x float> %0 1883} 1884 1885define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { 1886; CHECK-LABEL: test_vfmsq_lane_f32_0: 1887; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1888; CHECK-NEXT: ret 1889entry: 1890 %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v 1891 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer 1892 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1893 ret <4 x float> %0 1894} 1895 1896define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { 1897; CHECK-LABEL: test_vfms_laneq_f32_0: 1898; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1899; CHECK-NEXT: ret 1900entry: 1901 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1902 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer 1903 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 1904 ret <2 x float> %0 1905} 1906 1907define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { 1908; CHECK-LABEL: test_vfmsq_laneq_f32_0: 1909; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1910; CHECK-NEXT: ret 1911entry: 1912 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v 1913 %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer 1914 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 1915 ret <4 x float> %0 1916} 1917 1918define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1919; CHECK-LABEL: test_vfmaq_laneq_f64_0: 1920; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1921; CHECK-NEXT: ret 1922entry: 1923 %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 1924 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1925 ret <2 x double> %0 1926} 1927 1928define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { 1929; CHECK-LABEL: test_vfmsq_laneq_f64_0: 1930; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 1931; CHECK-NEXT: ret 1932entry: 1933 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v 1934 %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer 1935 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) 1936 ret <2 x double> %0 1937} 1938 1939define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 1940; CHECK-LABEL: test_vmlal_lane_s16_0: 1941; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1942; CHECK-NEXT: ret 1943entry: 1944 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1945 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1946 %add = add <4 x i32> %vmull2.i, %a 1947 ret <4 x i32> %add 1948} 1949 1950define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 1951; CHECK-LABEL: test_vmlal_lane_s32_0: 1952; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1953; CHECK-NEXT: ret 1954entry: 1955 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 1956 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1957 %add = add <2 x i64> %vmull2.i, %a 1958 ret <2 x i64> %add 1959} 1960 1961define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 1962; CHECK-LABEL: test_vmlal_laneq_s16_0: 1963; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 1964; CHECK-NEXT: ret 1965entry: 1966 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 1967 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 1968 %add = add <4 x i32> %vmull2.i, %a 1969 ret <4 x i32> %add 1970} 1971 1972define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 1973; CHECK-LABEL: test_vmlal_laneq_s32_0: 1974; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 1975; CHECK-NEXT: ret 1976entry: 1977 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 1978 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 1979 %add = add <2 x i64> %vmull2.i, %a 1980 ret <2 x i64> %add 1981} 1982 1983define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 1984; CHECK-LABEL: test_vmlal_high_lane_s16_0: 1985; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 1986; CHECK-NEXT: ret 1987entry: 1988 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1989 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 1990 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 1991 %add = add <4 x i32> %vmull2.i, %a 1992 ret <4 x i32> %add 1993} 1994 1995define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 1996; CHECK-LABEL: test_vmlal_high_lane_s32_0: 1997; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 1998; CHECK-NEXT: ret 1999entry: 2000 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2001 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2002 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2003 %add = add <2 x i64> %vmull2.i, %a 2004 ret <2 x i64> %add 2005} 2006 2007define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2008; CHECK-LABEL: test_vmlal_high_laneq_s16_0: 2009; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2010; CHECK-NEXT: ret 2011entry: 2012 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2013 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2014 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2015 %add = add <4 x i32> %vmull2.i, %a 2016 ret <4 x i32> %add 2017} 2018 2019define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2020; CHECK-LABEL: test_vmlal_high_laneq_s32_0: 2021; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2022; CHECK-NEXT: ret 2023entry: 2024 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2025 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2026 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2027 %add = add <2 x i64> %vmull2.i, %a 2028 ret <2 x i64> %add 2029} 2030 2031define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2032; CHECK-LABEL: test_vmlsl_lane_s16_0: 2033; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2034; CHECK-NEXT: ret 2035entry: 2036 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2037 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2038 %sub = sub <4 x i32> %a, %vmull2.i 2039 ret <4 x i32> %sub 2040} 2041 2042define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2043; CHECK-LABEL: test_vmlsl_lane_s32_0: 2044; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2045; CHECK-NEXT: ret 2046entry: 2047 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2048 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2049 %sub = sub <2 x i64> %a, %vmull2.i 2050 ret <2 x i64> %sub 2051} 2052 2053define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2054; CHECK-LABEL: test_vmlsl_laneq_s16_0: 2055; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2056; CHECK-NEXT: ret 2057entry: 2058 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2059 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2060 %sub = sub <4 x i32> %a, %vmull2.i 2061 ret <4 x i32> %sub 2062} 2063 2064define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2065; CHECK-LABEL: test_vmlsl_laneq_s32_0: 2066; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2067; CHECK-NEXT: ret 2068entry: 2069 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2070 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2071 %sub = sub <2 x i64> %a, %vmull2.i 2072 ret <2 x i64> %sub 2073} 2074 2075define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2076; CHECK-LABEL: test_vmlsl_high_lane_s16_0: 2077; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2078; CHECK-NEXT: ret 2079entry: 2080 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2081 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2082 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2083 %sub = sub <4 x i32> %a, %vmull2.i 2084 ret <4 x i32> %sub 2085} 2086 2087define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2088; CHECK-LABEL: test_vmlsl_high_lane_s32_0: 2089; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2090; CHECK-NEXT: ret 2091entry: 2092 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2093 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2094 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2095 %sub = sub <2 x i64> %a, %vmull2.i 2096 ret <2 x i64> %sub 2097} 2098 2099define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2100; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: 2101; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2102; CHECK-NEXT: ret 2103entry: 2104 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2105 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2106 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2107 %sub = sub <4 x i32> %a, %vmull2.i 2108 ret <4 x i32> %sub 2109} 2110 2111define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2112; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: 2113; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2114; CHECK-NEXT: ret 2115entry: 2116 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2117 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2118 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2119 %sub = sub <2 x i64> %a, %vmull2.i 2120 ret <2 x i64> %sub 2121} 2122 2123define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2124; CHECK-LABEL: test_vmlal_lane_u16_0: 2125; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2126; CHECK-NEXT: ret 2127entry: 2128 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2129 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2130 %add = add <4 x i32> %vmull2.i, %a 2131 ret <4 x i32> %add 2132} 2133 2134define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2135; CHECK-LABEL: test_vmlal_lane_u32_0: 2136; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2137; CHECK-NEXT: ret 2138entry: 2139 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2140 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2141 %add = add <2 x i64> %vmull2.i, %a 2142 ret <2 x i64> %add 2143} 2144 2145define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2146; CHECK-LABEL: test_vmlal_laneq_u16_0: 2147; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2148; CHECK-NEXT: ret 2149entry: 2150 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2151 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2152 %add = add <4 x i32> %vmull2.i, %a 2153 ret <4 x i32> %add 2154} 2155 2156define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2157; CHECK-LABEL: test_vmlal_laneq_u32_0: 2158; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2159; CHECK-NEXT: ret 2160entry: 2161 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2162 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2163 %add = add <2 x i64> %vmull2.i, %a 2164 ret <2 x i64> %add 2165} 2166 2167define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2168; CHECK-LABEL: test_vmlal_high_lane_u16_0: 2169; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2170; CHECK-NEXT: ret 2171entry: 2172 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2173 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2174 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2175 %add = add <4 x i32> %vmull2.i, %a 2176 ret <4 x i32> %add 2177} 2178 2179define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2180; CHECK-LABEL: test_vmlal_high_lane_u32_0: 2181; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2182; CHECK-NEXT: ret 2183entry: 2184 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2185 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2186 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2187 %add = add <2 x i64> %vmull2.i, %a 2188 ret <2 x i64> %add 2189} 2190 2191define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2192; CHECK-LABEL: test_vmlal_high_laneq_u16_0: 2193; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2194; CHECK-NEXT: ret 2195entry: 2196 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2197 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2198 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2199 %add = add <4 x i32> %vmull2.i, %a 2200 ret <4 x i32> %add 2201} 2202 2203define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2204; CHECK-LABEL: test_vmlal_high_laneq_u32_0: 2205; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2206; CHECK-NEXT: ret 2207entry: 2208 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2209 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2210 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2211 %add = add <2 x i64> %vmull2.i, %a 2212 ret <2 x i64> %add 2213} 2214 2215define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2216; CHECK-LABEL: test_vmlsl_lane_u16_0: 2217; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2218; CHECK-NEXT: ret 2219entry: 2220 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2221 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2222 %sub = sub <4 x i32> %a, %vmull2.i 2223 ret <4 x i32> %sub 2224} 2225 2226define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2227; CHECK-LABEL: test_vmlsl_lane_u32_0: 2228; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2229; CHECK-NEXT: ret 2230entry: 2231 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2232 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2233 %sub = sub <2 x i64> %a, %vmull2.i 2234 ret <2 x i64> %sub 2235} 2236 2237define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { 2238; CHECK-LABEL: test_vmlsl_laneq_u16_0: 2239; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2240; CHECK-NEXT: ret 2241entry: 2242 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2243 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2244 %sub = sub <4 x i32> %a, %vmull2.i 2245 ret <4 x i32> %sub 2246} 2247 2248define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { 2249; CHECK-LABEL: test_vmlsl_laneq_u32_0: 2250; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2251; CHECK-NEXT: ret 2252entry: 2253 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2254 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2255 %sub = sub <2 x i64> %a, %vmull2.i 2256 ret <2 x i64> %sub 2257} 2258 2259define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2260; CHECK-LABEL: test_vmlsl_high_lane_u16_0: 2261; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2262; CHECK-NEXT: ret 2263entry: 2264 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2265 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2266 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2267 %sub = sub <4 x i32> %a, %vmull2.i 2268 ret <4 x i32> %sub 2269} 2270 2271define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2272; CHECK-LABEL: test_vmlsl_high_lane_u32_0: 2273; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2274; CHECK-NEXT: ret 2275entry: 2276 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2277 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2278 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2279 %sub = sub <2 x i64> %a, %vmull2.i 2280 ret <2 x i64> %sub 2281} 2282 2283define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { 2284; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: 2285; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2286; CHECK-NEXT: ret 2287entry: 2288 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2289 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2290 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2291 %sub = sub <4 x i32> %a, %vmull2.i 2292 ret <4 x i32> %sub 2293} 2294 2295define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { 2296; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: 2297; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2298; CHECK-NEXT: ret 2299entry: 2300 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2301 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2302 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2303 %sub = sub <2 x i64> %a, %vmull2.i 2304 ret <2 x i64> %sub 2305} 2306 2307define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2308; CHECK-LABEL: test_vmull_lane_s16_0: 2309; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2310; CHECK-NEXT: ret 2311entry: 2312 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2313 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2314 ret <4 x i32> %vmull2.i 2315} 2316 2317define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2318; CHECK-LABEL: test_vmull_lane_s32_0: 2319; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2320; CHECK-NEXT: ret 2321entry: 2322 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2323 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2324 ret <2 x i64> %vmull2.i 2325} 2326 2327define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { 2328; CHECK-LABEL: test_vmull_lane_u16_0: 2329; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2330; CHECK-NEXT: ret 2331entry: 2332 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2333 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2334 ret <4 x i32> %vmull2.i 2335} 2336 2337define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { 2338; CHECK-LABEL: test_vmull_lane_u32_0: 2339; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2340; CHECK-NEXT: ret 2341entry: 2342 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2343 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2344 ret <2 x i64> %vmull2.i 2345} 2346 2347define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2348; CHECK-LABEL: test_vmull_high_lane_s16_0: 2349; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2350; CHECK-NEXT: ret 2351entry: 2352 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2353 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2354 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2355 ret <4 x i32> %vmull2.i 2356} 2357 2358define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2359; CHECK-LABEL: test_vmull_high_lane_s32_0: 2360; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2361; CHECK-NEXT: ret 2362entry: 2363 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2364 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2365 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2366 ret <2 x i64> %vmull2.i 2367} 2368 2369define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { 2370; CHECK-LABEL: test_vmull_high_lane_u16_0: 2371; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2372; CHECK-NEXT: ret 2373entry: 2374 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2375 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2376 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2377 ret <4 x i32> %vmull2.i 2378} 2379 2380define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { 2381; CHECK-LABEL: test_vmull_high_lane_u32_0: 2382; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2383; CHECK-NEXT: ret 2384entry: 2385 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2386 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2387 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2388 ret <2 x i64> %vmull2.i 2389} 2390 2391define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2392; CHECK-LABEL: test_vmull_laneq_s16_0: 2393; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2394; CHECK-NEXT: ret 2395entry: 2396 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2397 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2398 ret <4 x i32> %vmull2.i 2399} 2400 2401define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2402; CHECK-LABEL: test_vmull_laneq_s32_0: 2403; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2404; CHECK-NEXT: ret 2405entry: 2406 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2407 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2408 ret <2 x i64> %vmull2.i 2409} 2410 2411define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { 2412; CHECK-LABEL: test_vmull_laneq_u16_0: 2413; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2414; CHECK-NEXT: ret 2415entry: 2416 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2417 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2418 ret <4 x i32> %vmull2.i 2419} 2420 2421define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { 2422; CHECK-LABEL: test_vmull_laneq_u32_0: 2423; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2424; CHECK-NEXT: ret 2425entry: 2426 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2427 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2428 ret <2 x i64> %vmull2.i 2429} 2430 2431define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2432; CHECK-LABEL: test_vmull_high_laneq_s16_0: 2433; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2434; CHECK-NEXT: ret 2435entry: 2436 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2437 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2438 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2439 ret <4 x i32> %vmull2.i 2440} 2441 2442define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2443; CHECK-LABEL: test_vmull_high_laneq_s32_0: 2444; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2445; CHECK-NEXT: ret 2446entry: 2447 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2448 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2449 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2450 ret <2 x i64> %vmull2.i 2451} 2452 2453define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { 2454; CHECK-LABEL: test_vmull_high_laneq_u16_0: 2455; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2456; CHECK-NEXT: ret 2457entry: 2458 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2459 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2460 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2461 ret <4 x i32> %vmull2.i 2462} 2463 2464define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { 2465; CHECK-LABEL: test_vmull_high_laneq_u32_0: 2466; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2467; CHECK-NEXT: ret 2468entry: 2469 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2470 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2471 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2472 ret <2 x i64> %vmull2.i 2473} 2474 2475define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2476; CHECK-LABEL: test_vqdmlal_lane_s16_0: 2477; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2478; CHECK-NEXT: ret 2479entry: 2480 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2481 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2482 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2483 ret <4 x i32> %vqdmlal4.i 2484} 2485 2486define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2487; CHECK-LABEL: test_vqdmlal_lane_s32_0: 2488; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2489; CHECK-NEXT: ret 2490entry: 2491 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2492 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2493 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2494 ret <2 x i64> %vqdmlal4.i 2495} 2496 2497define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2498; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: 2499; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2500; CHECK-NEXT: ret 2501entry: 2502 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2503 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2504 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2505 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2506 ret <4 x i32> %vqdmlal4.i 2507} 2508 2509define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2510; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: 2511; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2512; CHECK-NEXT: ret 2513entry: 2514 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2515 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2516 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2517 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2518 ret <2 x i64> %vqdmlal4.i 2519} 2520 2521define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { 2522; CHECK-LABEL: test_vqdmlsl_lane_s16_0: 2523; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2524; CHECK-NEXT: ret 2525entry: 2526 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2527 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) 2528 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2529 ret <4 x i32> %vqdmlsl4.i 2530} 2531 2532define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { 2533; CHECK-LABEL: test_vqdmlsl_lane_s32_0: 2534; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2535; CHECK-NEXT: ret 2536entry: 2537 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2538 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) 2539 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2540 ret <2 x i64> %vqdmlsl4.i 2541} 2542 2543define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { 2544; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: 2545; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2546; CHECK-NEXT: ret 2547entry: 2548 %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2549 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2550 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2551 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2552 ret <4 x i32> %vqdmlsl4.i 2553} 2554 2555define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { 2556; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: 2557; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2558; CHECK-NEXT: ret 2559entry: 2560 %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2561 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2562 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2563 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2564 ret <2 x i64> %vqdmlsl4.i 2565} 2566 2567define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2568; CHECK-LABEL: test_vqdmull_lane_s16_0: 2569; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2570; CHECK-NEXT: ret 2571entry: 2572 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2573 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2574 ret <4 x i32> %vqdmull2.i 2575} 2576 2577define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2578; CHECK-LABEL: test_vqdmull_lane_s32_0: 2579; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2580; CHECK-NEXT: ret 2581entry: 2582 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2583 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2584 ret <2 x i64> %vqdmull2.i 2585} 2586 2587define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { 2588; CHECK-LABEL: test_vqdmull_laneq_s16_0: 2589; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2590; CHECK-NEXT: ret 2591entry: 2592 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2593 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) 2594 ret <4 x i32> %vqdmull2.i 2595} 2596 2597define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { 2598; CHECK-LABEL: test_vqdmull_laneq_s32_0: 2599; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2600; CHECK-NEXT: ret 2601entry: 2602 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2603 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) 2604 ret <2 x i64> %vqdmull2.i 2605} 2606 2607define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2608; CHECK-LABEL: test_vqdmull_high_lane_s16_0: 2609; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2610; CHECK-NEXT: ret 2611entry: 2612 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2613 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2614 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2615 ret <4 x i32> %vqdmull2.i 2616} 2617 2618define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2619; CHECK-LABEL: test_vqdmull_high_lane_s32_0: 2620; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2621; CHECK-NEXT: ret 2622entry: 2623 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2624 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2625 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2626 ret <2 x i64> %vqdmull2.i 2627} 2628 2629define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { 2630; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: 2631; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2632; CHECK-NEXT: ret 2633entry: 2634 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2635 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer 2636 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i, <4 x i16> %shuffle) 2637 ret <4 x i32> %vqdmull2.i 2638} 2639 2640define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { 2641; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: 2642; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2643; CHECK-NEXT: ret 2644entry: 2645 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2646 %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer 2647 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i, <2 x i32> %shuffle) 2648 ret <2 x i64> %vqdmull2.i 2649} 2650 2651define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2652; CHECK-LABEL: test_vqdmulh_lane_s16_0: 2653; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2654; CHECK-NEXT: ret 2655entry: 2656 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2657 %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2658 ret <4 x i16> %vqdmulh2.i 2659} 2660 2661define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2662; CHECK-LABEL: test_vqdmulhq_lane_s16_0: 2663; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2664; CHECK-NEXT: ret 2665entry: 2666 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2667 %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2668 ret <8 x i16> %vqdmulh2.i 2669} 2670 2671define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2672; CHECK-LABEL: test_vqdmulh_lane_s32_0: 2673; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2674; CHECK-NEXT: ret 2675entry: 2676 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2677 %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2678 ret <2 x i32> %vqdmulh2.i 2679} 2680 2681define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2682; CHECK-LABEL: test_vqdmulhq_lane_s32_0: 2683; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2684; CHECK-NEXT: ret 2685entry: 2686 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2687 %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2688 ret <4 x i32> %vqdmulh2.i 2689} 2690 2691define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { 2692; CHECK-LABEL: test_vqrdmulh_lane_s16_0: 2693; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] 2694; CHECK-NEXT: ret 2695entry: 2696 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer 2697 %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) 2698 ret <4 x i16> %vqrdmulh2.i 2699} 2700 2701define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { 2702; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: 2703; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] 2704; CHECK-NEXT: ret 2705entry: 2706 %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer 2707 %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) 2708 ret <8 x i16> %vqrdmulh2.i 2709} 2710 2711define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { 2712; CHECK-LABEL: test_vqrdmulh_lane_s32_0: 2713; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2714; CHECK-NEXT: ret 2715entry: 2716 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer 2717 %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) 2718 ret <2 x i32> %vqrdmulh2.i 2719} 2720 2721define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { 2722; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: 2723; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2724; CHECK-NEXT: ret 2725entry: 2726 %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer 2727 %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) 2728 ret <4 x i32> %vqrdmulh2.i 2729} 2730 2731define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2732; CHECK-LABEL: test_vmul_lane_f32_0: 2733; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2734; CHECK-NEXT: ret 2735entry: 2736 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2737 %mul = fmul <2 x float> %shuffle, %a 2738 ret <2 x float> %mul 2739} 2740 2741define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2742; CHECK-LABEL: test_vmulq_lane_f32_0: 2743; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2744; CHECK-NEXT: ret 2745entry: 2746 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2747 %mul = fmul <4 x float> %shuffle, %a 2748 ret <4 x float> %mul 2749} 2750 2751define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2752; CHECK-LABEL: test_vmul_laneq_f32_0: 2753; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2754; CHECK-NEXT: ret 2755entry: 2756 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2757 %mul = fmul <2 x float> %shuffle, %a 2758 ret <2 x float> %mul 2759} 2760 2761define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { 2762; CHECK-LABEL: test_vmul_laneq_f64_0: 2763; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] 2764; CHECK-NEXT: ret 2765entry: 2766 %0 = bitcast <1 x double> %a to <8 x i8> 2767 %1 = bitcast <8 x i8> %0 to double 2768 %extract = extractelement <2 x double> %v, i32 0 2769 %2 = fmul double %1, %extract 2770 %3 = insertelement <1 x double> undef, double %2, i32 0 2771 ret <1 x double> %3 2772} 2773 2774define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2775; CHECK-LABEL: test_vmulq_laneq_f32_0: 2776; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2777; CHECK-NEXT: ret 2778entry: 2779 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2780 %mul = fmul <4 x float> %shuffle, %a 2781 ret <4 x float> %mul 2782} 2783 2784define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2785; CHECK-LABEL: test_vmulq_laneq_f64_0: 2786; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2787; CHECK-NEXT: ret 2788entry: 2789 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2790 %mul = fmul <2 x double> %shuffle, %a 2791 ret <2 x double> %mul 2792} 2793 2794define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { 2795; CHECK-LABEL: test_vmulx_lane_f32_0: 2796; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2797; CHECK-NEXT: ret 2798entry: 2799 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer 2800 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2801 ret <2 x float> %vmulx2.i 2802} 2803 2804define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { 2805; CHECK-LABEL: test_vmulxq_lane_f32_0: 2806; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2807; CHECK-NEXT: ret 2808entry: 2809 %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer 2810 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2811 ret <4 x float> %vmulx2.i 2812} 2813 2814define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { 2815; CHECK-LABEL: test_vmulxq_lane_f64_0: 2816; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2817; CHECK-NEXT: ret 2818entry: 2819 %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer 2820 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2821 ret <2 x double> %vmulx2.i 2822} 2823 2824define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { 2825; CHECK-LABEL: test_vmulx_laneq_f32_0: 2826; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] 2827; CHECK-NEXT: ret 2828entry: 2829 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer 2830 %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) 2831 ret <2 x float> %vmulx2.i 2832} 2833 2834define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { 2835; CHECK-LABEL: test_vmulxq_laneq_f32_0: 2836; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] 2837; CHECK-NEXT: ret 2838entry: 2839 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 2840 %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) 2841 ret <4 x float> %vmulx2.i 2842} 2843 2844define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { 2845; CHECK-LABEL: test_vmulxq_laneq_f64_0: 2846; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] 2847; CHECK-NEXT: ret 2848entry: 2849 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 2850 %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) 2851 ret <2 x double> %vmulx2.i 2852} 2853 2854