1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 3 4declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) 5declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 6declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) 7declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 8declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) 9declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 10declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 11declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 12declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) 13declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) 14declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) 15declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) 16declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) 17declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) 18declare <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32>, <2 x i32>) 19declare <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16>, <4 x i16>) 20declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) 21declare <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32>, <2 x i32>) 22declare <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16>, <4 x i16>) 23declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) 24declare <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64>, <2 x i64>) 25declare <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32>, <4 x i32>) 26declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) 27declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) 28declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) 29declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) 30 31define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) { 32; CHECK-LABEL: test_vaddl_s8: 33; CHECK: // %bb.0: // %entry 34; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b 35; CHECK-NEXT: ret 36entry: 37 %vmovl.i.i = sext <8 x i8> %a to <8 x i16> 38 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> 39 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 40 ret <8 x i16> %add.i 41} 42 43define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) { 44; CHECK-LABEL: test_vaddl_s16: 45; CHECK: // %bb.0: // %entry 46; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h 47; CHECK-NEXT: ret 48entry: 49 %vmovl.i.i = sext <4 x i16> %a to <4 x i32> 50 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> 51 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 52 ret <4 x i32> %add.i 53} 54 55define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) { 56; CHECK-LABEL: test_vaddl_s32: 57; CHECK: // %bb.0: // %entry 58; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s 59; CHECK-NEXT: ret 60entry: 61 %vmovl.i.i = sext <2 x i32> %a to <2 x i64> 62 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> 63 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 64 ret <2 x i64> %add.i 65} 66 67define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) { 68; CHECK-LABEL: test_vaddl_u8: 69; CHECK: // %bb.0: // %entry 70; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b 71; CHECK-NEXT: ret 72entry: 73 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 74 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 75 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 76 ret <8 x i16> %add.i 77} 78 79define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) { 80; CHECK-LABEL: test_vaddl_u16: 81; CHECK: // %bb.0: // %entry 82; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h 83; CHECK-NEXT: ret 84entry: 85 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 86 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 87 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 88 ret <4 x i32> %add.i 89} 90 91define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) { 92; CHECK-LABEL: test_vaddl_u32: 93; CHECK: // %bb.0: // %entry 94; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s 95; CHECK-NEXT: ret 96entry: 97 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 98 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 99 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 100 ret <2 x i64> %add.i 101} 102 103define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) { 104; CHECK-LABEL: test_vaddl_a8: 105; CHECK: // %bb.0: // %entry 106; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b 107; CHECK-NEXT: bic v0.8h, #255, lsl #8 108; CHECK-NEXT: ret 109entry: 110 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 111 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 112 %add.i = add <8 x i16> %vmovl.i.i, %vmovl.i2.i 113 %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 114 ret <8 x i16> %and 115} 116 117define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) { 118; CHECK-LABEL: test_vaddl_a16: 119; CHECK: // %bb.0: // %entry 120; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h 121; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 122; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 123; CHECK-NEXT: ret 124entry: 125 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 126 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 127 %add.i = add <4 x i32> %vmovl.i.i, %vmovl.i2.i 128 %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535> 129 ret <4 x i32> %and 130} 131 132define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) { 133; CHECK-LABEL: test_vaddl_a32: 134; CHECK: // %bb.0: // %entry 135; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s 136; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 137; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 138; CHECK-NEXT: ret 139entry: 140 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 141 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 142 %add.i = add <2 x i64> %vmovl.i.i, %vmovl.i2.i 143 %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295> 144 ret <2 x i64> %and 145} 146 147define <8 x i16> @test_vaddl_high_s8(<16 x i8> %a, <16 x i8> %b) { 148; CHECK-LABEL: test_vaddl_high_s8: 149; CHECK: // %bb.0: // %entry 150; CHECK-NEXT: saddl2 v0.8h, v0.16b, v1.16b 151; CHECK-NEXT: ret 152entry: 153 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 154 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 155 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 156 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> 157 %add.i = add <8 x i16> %0, %1 158 ret <8 x i16> %add.i 159} 160 161define <4 x i32> @test_vaddl_high_s16(<8 x i16> %a, <8 x i16> %b) { 162; CHECK-LABEL: test_vaddl_high_s16: 163; CHECK: // %bb.0: // %entry 164; CHECK-NEXT: saddl2 v0.4s, v0.8h, v1.8h 165; CHECK-NEXT: ret 166entry: 167 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 168 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 169 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 170 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> 171 %add.i = add <4 x i32> %0, %1 172 ret <4 x i32> %add.i 173} 174 175define <2 x i64> @test_vaddl_high_s32(<4 x i32> %a, <4 x i32> %b) { 176; CHECK-LABEL: test_vaddl_high_s32: 177; CHECK: // %bb.0: // %entry 178; CHECK-NEXT: saddl2 v0.2d, v0.4s, v1.4s 179; CHECK-NEXT: ret 180entry: 181 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 182 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 183 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 184 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> 185 %add.i = add <2 x i64> %0, %1 186 ret <2 x i64> %add.i 187} 188 189define <8 x i16> @test_vaddl_high_u8(<16 x i8> %a, <16 x i8> %b) { 190; CHECK-LABEL: test_vaddl_high_u8: 191; CHECK: // %bb.0: // %entry 192; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b 193; CHECK-NEXT: ret 194entry: 195 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 196 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 197 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 198 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 199 %add.i = add <8 x i16> %0, %1 200 ret <8 x i16> %add.i 201} 202 203define <4 x i32> @test_vaddl_high_u16(<8 x i16> %a, <8 x i16> %b) { 204; CHECK-LABEL: test_vaddl_high_u16: 205; CHECK: // %bb.0: // %entry 206; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h 207; CHECK-NEXT: ret 208entry: 209 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 210 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 211 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 212 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 213 %add.i = add <4 x i32> %0, %1 214 ret <4 x i32> %add.i 215} 216 217define <2 x i64> @test_vaddl_high_u32(<4 x i32> %a, <4 x i32> %b) { 218; CHECK-LABEL: test_vaddl_high_u32: 219; CHECK: // %bb.0: // %entry 220; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s 221; CHECK-NEXT: ret 222entry: 223 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 224 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 225 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 226 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 227 %add.i = add <2 x i64> %0, %1 228 ret <2 x i64> %add.i 229} 230 231define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) { 232; CHECK-LABEL: test_vaddl_high_a8: 233; CHECK: // %bb.0: // %entry 234; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b 235; CHECK-NEXT: bic v0.8h, #255, lsl #8 236; CHECK-NEXT: ret 237entry: 238 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 239 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 240 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 241 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 242 %add.i = add <8 x i16> %0, %1 243 %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 244 ret <8 x i16> %and 245} 246 247define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) { 248; CHECK-LABEL: test_vaddl_high_a16: 249; CHECK: // %bb.0: // %entry 250; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h 251; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 252; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 253; CHECK-NEXT: ret 254entry: 255 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 256 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 257 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 258 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 259 %add.i = add <4 x i32> %0, %1 260 %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535> 261 ret <4 x i32> %and 262} 263 264define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) { 265; CHECK-LABEL: test_vaddl_high_a32: 266; CHECK: // %bb.0: // %entry 267; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s 268; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 269; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 270; CHECK-NEXT: ret 271entry: 272 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 273 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 274 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 275 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 276 %add.i = add <2 x i64> %0, %1 277 %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295> 278 ret <2 x i64> %and 279} 280 281define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) { 282; CHECK-LABEL: test_vaddw_s8: 283; CHECK: // %bb.0: // %entry 284; CHECK-NEXT: saddw v0.8h, v0.8h, v1.8b 285; CHECK-NEXT: ret 286entry: 287 %vmovl.i.i = sext <8 x i8> %b to <8 x i16> 288 %add.i = add <8 x i16> %vmovl.i.i, %a 289 ret <8 x i16> %add.i 290} 291 292define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) { 293; CHECK-LABEL: test_vaddw_s16: 294; CHECK: // %bb.0: // %entry 295; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h 296; CHECK-NEXT: ret 297entry: 298 %vmovl.i.i = sext <4 x i16> %b to <4 x i32> 299 %add.i = add <4 x i32> %vmovl.i.i, %a 300 ret <4 x i32> %add.i 301} 302 303define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) { 304; CHECK-LABEL: test_vaddw_s32: 305; CHECK: // %bb.0: // %entry 306; CHECK-NEXT: saddw v0.2d, v0.2d, v1.2s 307; CHECK-NEXT: ret 308entry: 309 %vmovl.i.i = sext <2 x i32> %b to <2 x i64> 310 %add.i = add <2 x i64> %vmovl.i.i, %a 311 ret <2 x i64> %add.i 312} 313 314define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) { 315; CHECK-LABEL: test_vaddw_u8: 316; CHECK: // %bb.0: // %entry 317; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b 318; CHECK-NEXT: ret 319entry: 320 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 321 %add.i = add <8 x i16> %vmovl.i.i, %a 322 ret <8 x i16> %add.i 323} 324 325define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) { 326; CHECK-LABEL: test_vaddw_u16: 327; CHECK: // %bb.0: // %entry 328; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h 329; CHECK-NEXT: ret 330entry: 331 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 332 %add.i = add <4 x i32> %vmovl.i.i, %a 333 ret <4 x i32> %add.i 334} 335 336define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) { 337; CHECK-LABEL: test_vaddw_u32: 338; CHECK: // %bb.0: // %entry 339; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s 340; CHECK-NEXT: ret 341entry: 342 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 343 %add.i = add <2 x i64> %vmovl.i.i, %a 344 ret <2 x i64> %add.i 345} 346 347define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) { 348; CHECK-LABEL: test_vaddw_a8: 349; CHECK: // %bb.0: // %entry 350; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b 351; CHECK-NEXT: bic v0.8h, #255, lsl #8 352; CHECK-NEXT: ret 353entry: 354 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 355 %add.i = add <8 x i16> %vmovl.i.i, %a 356 %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 357 ret <8 x i16> %and 358} 359 360define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) { 361; CHECK-LABEL: test_vaddw_a16: 362; CHECK: // %bb.0: // %entry 363; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h 364; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 365; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 366; CHECK-NEXT: ret 367entry: 368 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 369 %add.i = add <4 x i32> %vmovl.i.i, %a 370 %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535> 371 ret <4 x i32> %and 372} 373 374define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) { 375; CHECK-LABEL: test_vaddw_a32: 376; CHECK: // %bb.0: // %entry 377; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s 378; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 379; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 380; CHECK-NEXT: ret 381entry: 382 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 383 %add.i = add <2 x i64> %vmovl.i.i, %a 384 %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295> 385 ret <2 x i64> %and 386} 387 388define <8 x i16> @test_vaddw_high_s8(<8 x i16> %a, <16 x i8> %b) { 389; CHECK-LABEL: test_vaddw_high_s8: 390; CHECK: // %bb.0: // %entry 391; CHECK-NEXT: saddw2 v0.8h, v0.8h, v1.16b 392; CHECK-NEXT: ret 393entry: 394 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 395 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 396 %add.i = add <8 x i16> %0, %a 397 ret <8 x i16> %add.i 398} 399 400define <4 x i32> @test_vaddw_high_s16(<4 x i32> %a, <8 x i16> %b) { 401; CHECK-LABEL: test_vaddw_high_s16: 402; CHECK: // %bb.0: // %entry 403; CHECK-NEXT: saddw2 v0.4s, v0.4s, v1.8h 404; CHECK-NEXT: ret 405entry: 406 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 407 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 408 %add.i = add <4 x i32> %0, %a 409 ret <4 x i32> %add.i 410} 411 412define <2 x i64> @test_vaddw_high_s32(<2 x i64> %a, <4 x i32> %b) { 413; CHECK-LABEL: test_vaddw_high_s32: 414; CHECK: // %bb.0: // %entry 415; CHECK-NEXT: saddw2 v0.2d, v0.2d, v1.4s 416; CHECK-NEXT: ret 417entry: 418 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 419 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 420 %add.i = add <2 x i64> %0, %a 421 ret <2 x i64> %add.i 422} 423 424define <8 x i16> @test_vaddw_high_u8(<8 x i16> %a, <16 x i8> %b) { 425; CHECK-LABEL: test_vaddw_high_u8: 426; CHECK: // %bb.0: // %entry 427; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b 428; CHECK-NEXT: ret 429entry: 430 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 431 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 432 %add.i = add <8 x i16> %0, %a 433 ret <8 x i16> %add.i 434} 435 436define <4 x i32> @test_vaddw_high_u16(<4 x i32> %a, <8 x i16> %b) { 437; CHECK-LABEL: test_vaddw_high_u16: 438; CHECK: // %bb.0: // %entry 439; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h 440; CHECK-NEXT: ret 441entry: 442 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 443 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 444 %add.i = add <4 x i32> %0, %a 445 ret <4 x i32> %add.i 446} 447 448define <2 x i64> @test_vaddw_high_u32(<2 x i64> %a, <4 x i32> %b) { 449; CHECK-LABEL: test_vaddw_high_u32: 450; CHECK: // %bb.0: // %entry 451; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s 452; CHECK-NEXT: ret 453entry: 454 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 455 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 456 %add.i = add <2 x i64> %0, %a 457 ret <2 x i64> %add.i 458} 459 460define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) { 461; CHECK-LABEL: test_vaddw_high_a8: 462; CHECK: // %bb.0: // %entry 463; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b 464; CHECK-NEXT: bic v0.8h, #255, lsl #8 465; CHECK-NEXT: ret 466entry: 467 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 468 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 469 %add.i = add <8 x i16> %0, %a 470 %and = and <8 x i16> %add.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 471 ret <8 x i16> %and 472} 473 474define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) { 475; CHECK-LABEL: test_vaddw_high_a16: 476; CHECK: // %bb.0: // %entry 477; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h 478; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 479; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 480; CHECK-NEXT: ret 481entry: 482 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 483 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 484 %add.i = add <4 x i32> %0, %a 485 %and = and <4 x i32> %add.i, <i32 65535, i32 65535, i32 65535, i32 65535> 486 ret <4 x i32> %and 487} 488 489define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) { 490; CHECK-LABEL: test_vaddw_high_a32: 491; CHECK: // %bb.0: // %entry 492; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s 493; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 494; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 495; CHECK-NEXT: ret 496entry: 497 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 498 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 499 %add.i = add <2 x i64> %0, %a 500 %and = and <2 x i64> %add.i, <i64 4294967295, i64 4294967295> 501 ret <2 x i64> %and 502} 503 504define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) { 505; CHECK-LABEL: test_vsubl_s8: 506; CHECK: // %bb.0: // %entry 507; CHECK-NEXT: ssubl v0.8h, v0.8b, v1.8b 508; CHECK-NEXT: ret 509entry: 510 %vmovl.i.i = sext <8 x i8> %a to <8 x i16> 511 %vmovl.i2.i = sext <8 x i8> %b to <8 x i16> 512 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 513 ret <8 x i16> %sub.i 514} 515 516define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) { 517; CHECK-LABEL: test_vsubl_s16: 518; CHECK: // %bb.0: // %entry 519; CHECK-NEXT: ssubl v0.4s, v0.4h, v1.4h 520; CHECK-NEXT: ret 521entry: 522 %vmovl.i.i = sext <4 x i16> %a to <4 x i32> 523 %vmovl.i2.i = sext <4 x i16> %b to <4 x i32> 524 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 525 ret <4 x i32> %sub.i 526} 527 528define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) { 529; CHECK-LABEL: test_vsubl_s32: 530; CHECK: // %bb.0: // %entry 531; CHECK-NEXT: ssubl v0.2d, v0.2s, v1.2s 532; CHECK-NEXT: ret 533entry: 534 %vmovl.i.i = sext <2 x i32> %a to <2 x i64> 535 %vmovl.i2.i = sext <2 x i32> %b to <2 x i64> 536 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 537 ret <2 x i64> %sub.i 538} 539 540define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) { 541; CHECK-LABEL: test_vsubl_u8: 542; CHECK: // %bb.0: // %entry 543; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b 544; CHECK-NEXT: ret 545entry: 546 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 547 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 548 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 549 ret <8 x i16> %sub.i 550} 551 552define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) { 553; CHECK-LABEL: test_vsubl_u16: 554; CHECK: // %bb.0: // %entry 555; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h 556; CHECK-NEXT: ret 557entry: 558 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 559 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 560 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 561 ret <4 x i32> %sub.i 562} 563 564define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) { 565; CHECK-LABEL: test_vsubl_u32: 566; CHECK: // %bb.0: // %entry 567; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s 568; CHECK-NEXT: ret 569entry: 570 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 571 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 572 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 573 ret <2 x i64> %sub.i 574} 575 576define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) { 577; CHECK-LABEL: test_vsubl_a8: 578; CHECK: // %bb.0: // %entry 579; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b 580; CHECK-NEXT: bic v0.8h, #255, lsl #8 581; CHECK-NEXT: ret 582entry: 583 %vmovl.i.i = zext <8 x i8> %a to <8 x i16> 584 %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> 585 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i2.i 586 %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 587 ret <8 x i16> %and 588} 589 590define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) { 591; CHECK-LABEL: test_vsubl_a16: 592; CHECK: // %bb.0: // %entry 593; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h 594; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 595; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 596; CHECK-NEXT: ret 597entry: 598 %vmovl.i.i = zext <4 x i16> %a to <4 x i32> 599 %vmovl.i2.i = zext <4 x i16> %b to <4 x i32> 600 %sub.i = sub <4 x i32> %vmovl.i.i, %vmovl.i2.i 601 %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535> 602 ret <4 x i32> %and 603} 604 605define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) { 606; CHECK-LABEL: test_vsubl_a32: 607; CHECK: // %bb.0: // %entry 608; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s 609; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 610; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 611; CHECK-NEXT: ret 612entry: 613 %vmovl.i.i = zext <2 x i32> %a to <2 x i64> 614 %vmovl.i2.i = zext <2 x i32> %b to <2 x i64> 615 %sub.i = sub <2 x i64> %vmovl.i.i, %vmovl.i2.i 616 %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295> 617 ret <2 x i64> %and 618} 619 620define <8 x i16> @test_vsubl_high_s8(<16 x i8> %a, <16 x i8> %b) { 621; CHECK-LABEL: test_vsubl_high_s8: 622; CHECK: // %bb.0: // %entry 623; CHECK-NEXT: ssubl2 v0.8h, v0.16b, v1.16b 624; CHECK-NEXT: ret 625entry: 626 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 627 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 628 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 629 %1 = sext <8 x i8> %shuffle.i.i2.i to <8 x i16> 630 %sub.i = sub <8 x i16> %0, %1 631 ret <8 x i16> %sub.i 632} 633 634define <4 x i32> @test_vsubl_high_s16(<8 x i16> %a, <8 x i16> %b) { 635; CHECK-LABEL: test_vsubl_high_s16: 636; CHECK: // %bb.0: // %entry 637; CHECK-NEXT: ssubl2 v0.4s, v0.8h, v1.8h 638; CHECK-NEXT: ret 639entry: 640 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 641 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 642 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 643 %1 = sext <4 x i16> %shuffle.i.i2.i to <4 x i32> 644 %sub.i = sub <4 x i32> %0, %1 645 ret <4 x i32> %sub.i 646} 647 648define <2 x i64> @test_vsubl_high_s32(<4 x i32> %a, <4 x i32> %b) { 649; CHECK-LABEL: test_vsubl_high_s32: 650; CHECK: // %bb.0: // %entry 651; CHECK-NEXT: ssubl2 v0.2d, v0.4s, v1.4s 652; CHECK-NEXT: ret 653entry: 654 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 655 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 656 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 657 %1 = sext <2 x i32> %shuffle.i.i2.i to <2 x i64> 658 %sub.i = sub <2 x i64> %0, %1 659 ret <2 x i64> %sub.i 660} 661 662define <8 x i16> @test_vsubl_high_u8(<16 x i8> %a, <16 x i8> %b) { 663; CHECK-LABEL: test_vsubl_high_u8: 664; CHECK: // %bb.0: // %entry 665; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b 666; CHECK-NEXT: ret 667entry: 668 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 669 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 670 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 671 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 672 %sub.i = sub <8 x i16> %0, %1 673 ret <8 x i16> %sub.i 674} 675 676define <4 x i32> @test_vsubl_high_u16(<8 x i16> %a, <8 x i16> %b) { 677; CHECK-LABEL: test_vsubl_high_u16: 678; CHECK: // %bb.0: // %entry 679; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h 680; CHECK-NEXT: ret 681entry: 682 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 683 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 684 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 685 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 686 %sub.i = sub <4 x i32> %0, %1 687 ret <4 x i32> %sub.i 688} 689 690define <2 x i64> @test_vsubl_high_u32(<4 x i32> %a, <4 x i32> %b) { 691; CHECK-LABEL: test_vsubl_high_u32: 692; CHECK: // %bb.0: // %entry 693; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s 694; CHECK-NEXT: ret 695entry: 696 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 697 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 698 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 699 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 700 %sub.i = sub <2 x i64> %0, %1 701 ret <2 x i64> %sub.i 702} 703 704define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) { 705; CHECK-LABEL: test_vsubl_high_a8: 706; CHECK: // %bb.0: // %entry 707; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b 708; CHECK-NEXT: bic v0.8h, #255, lsl #8 709; CHECK-NEXT: ret 710entry: 711 %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 712 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 713 %shuffle.i.i2.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 714 %1 = zext <8 x i8> %shuffle.i.i2.i to <8 x i16> 715 %sub.i = sub <8 x i16> %0, %1 716 %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 717 ret <8 x i16> %and 718} 719 720define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) { 721; CHECK-LABEL: test_vsubl_high_a16: 722; CHECK: // %bb.0: // %entry 723; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h 724; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 725; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 726; CHECK-NEXT: ret 727entry: 728 %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 729 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 730 %shuffle.i.i2.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 731 %1 = zext <4 x i16> %shuffle.i.i2.i to <4 x i32> 732 %sub.i = sub <4 x i32> %0, %1 733 %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535> 734 ret <4 x i32> %and 735} 736 737define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) { 738; CHECK-LABEL: test_vsubl_high_a32: 739; CHECK: // %bb.0: // %entry 740; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s 741; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 742; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 743; CHECK-NEXT: ret 744entry: 745 %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 746 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 747 %shuffle.i.i2.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 748 %1 = zext <2 x i32> %shuffle.i.i2.i to <2 x i64> 749 %sub.i = sub <2 x i64> %0, %1 750 %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295> 751 ret <2 x i64> %and 752} 753 754define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) { 755; CHECK-LABEL: test_vsubw_s8: 756; CHECK: // %bb.0: // %entry 757; CHECK-NEXT: ssubw v0.8h, v0.8h, v1.8b 758; CHECK-NEXT: ret 759entry: 760 %vmovl.i.i = sext <8 x i8> %b to <8 x i16> 761 %sub.i = sub <8 x i16> %a, %vmovl.i.i 762 ret <8 x i16> %sub.i 763} 764 765define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) { 766; CHECK-LABEL: test_vsubw_s16: 767; CHECK: // %bb.0: // %entry 768; CHECK-NEXT: ssubw v0.4s, v0.4s, v1.4h 769; CHECK-NEXT: ret 770entry: 771 %vmovl.i.i = sext <4 x i16> %b to <4 x i32> 772 %sub.i = sub <4 x i32> %a, %vmovl.i.i 773 ret <4 x i32> %sub.i 774} 775 776define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) { 777; CHECK-LABEL: test_vsubw_s32: 778; CHECK: // %bb.0: // %entry 779; CHECK-NEXT: ssubw v0.2d, v0.2d, v1.2s 780; CHECK-NEXT: ret 781entry: 782 %vmovl.i.i = sext <2 x i32> %b to <2 x i64> 783 %sub.i = sub <2 x i64> %a, %vmovl.i.i 784 ret <2 x i64> %sub.i 785} 786 787define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) { 788; CHECK-LABEL: test_vsubw_u8: 789; CHECK: // %bb.0: // %entry 790; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b 791; CHECK-NEXT: ret 792entry: 793 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 794 %sub.i = sub <8 x i16> %a, %vmovl.i.i 795 ret <8 x i16> %sub.i 796} 797 798define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) { 799; CHECK-LABEL: test_vsubw_u16: 800; CHECK: // %bb.0: // %entry 801; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h 802; CHECK-NEXT: ret 803entry: 804 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 805 %sub.i = sub <4 x i32> %a, %vmovl.i.i 806 ret <4 x i32> %sub.i 807} 808 809define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) { 810; CHECK-LABEL: test_vsubw_u32: 811; CHECK: // %bb.0: // %entry 812; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s 813; CHECK-NEXT: ret 814entry: 815 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 816 %sub.i = sub <2 x i64> %a, %vmovl.i.i 817 ret <2 x i64> %sub.i 818} 819 820define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) { 821; CHECK-LABEL: test_vsubw_a8: 822; CHECK: // %bb.0: // %entry 823; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b 824; CHECK-NEXT: bic v0.8h, #255, lsl #8 825; CHECK-NEXT: ret 826entry: 827 %vmovl.i.i = zext <8 x i8> %b to <8 x i16> 828 %sub.i = sub <8 x i16> %a, %vmovl.i.i 829 %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 830 ret <8 x i16> %and 831} 832 833define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) { 834; CHECK-LABEL: test_vsubw_a16: 835; CHECK: // %bb.0: // %entry 836; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h 837; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 838; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 839; CHECK-NEXT: ret 840entry: 841 %vmovl.i.i = zext <4 x i16> %b to <4 x i32> 842 %sub.i = sub <4 x i32> %a, %vmovl.i.i 843 %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535> 844 ret <4 x i32> %and 845} 846 847define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) { 848; CHECK-LABEL: test_vsubw_a32: 849; CHECK: // %bb.0: // %entry 850; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s 851; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 852; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 853; CHECK-NEXT: ret 854entry: 855 %vmovl.i.i = zext <2 x i32> %b to <2 x i64> 856 %sub.i = sub <2 x i64> %a, %vmovl.i.i 857 %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295> 858 ret <2 x i64> %and 859} 860 861define <8 x i16> @test_vsubw_high_s8(<8 x i16> %a, <16 x i8> %b) { 862; CHECK-LABEL: test_vsubw_high_s8: 863; CHECK: // %bb.0: // %entry 864; CHECK-NEXT: ssubw2 v0.8h, v0.8h, v1.16b 865; CHECK-NEXT: ret 866entry: 867 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 868 %0 = sext <8 x i8> %shuffle.i.i.i to <8 x i16> 869 %sub.i = sub <8 x i16> %a, %0 870 ret <8 x i16> %sub.i 871} 872 873define <4 x i32> @test_vsubw_high_s16(<4 x i32> %a, <8 x i16> %b) { 874; CHECK-LABEL: test_vsubw_high_s16: 875; CHECK: // %bb.0: // %entry 876; CHECK-NEXT: ssubw2 v0.4s, v0.4s, v1.8h 877; CHECK-NEXT: ret 878entry: 879 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 880 %0 = sext <4 x i16> %shuffle.i.i.i to <4 x i32> 881 %sub.i = sub <4 x i32> %a, %0 882 ret <4 x i32> %sub.i 883} 884 885define <2 x i64> @test_vsubw_high_s32(<2 x i64> %a, <4 x i32> %b) { 886; CHECK-LABEL: test_vsubw_high_s32: 887; CHECK: // %bb.0: // %entry 888; CHECK-NEXT: ssubw2 v0.2d, v0.2d, v1.4s 889; CHECK-NEXT: ret 890entry: 891 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 892 %0 = sext <2 x i32> %shuffle.i.i.i to <2 x i64> 893 %sub.i = sub <2 x i64> %a, %0 894 ret <2 x i64> %sub.i 895} 896 897define <8 x i16> @test_vsubw_high_u8(<8 x i16> %a, <16 x i8> %b) { 898; CHECK-LABEL: test_vsubw_high_u8: 899; CHECK: // %bb.0: // %entry 900; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b 901; CHECK-NEXT: ret 902entry: 903 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 904 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 905 %sub.i = sub <8 x i16> %a, %0 906 ret <8 x i16> %sub.i 907} 908 909define <4 x i32> @test_vsubw_high_u16(<4 x i32> %a, <8 x i16> %b) { 910; CHECK-LABEL: test_vsubw_high_u16: 911; CHECK: // %bb.0: // %entry 912; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h 913; CHECK-NEXT: ret 914entry: 915 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 916 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 917 %sub.i = sub <4 x i32> %a, %0 918 ret <4 x i32> %sub.i 919} 920 921define <2 x i64> @test_vsubw_high_u32(<2 x i64> %a, <4 x i32> %b) { 922; CHECK-LABEL: test_vsubw_high_u32: 923; CHECK: // %bb.0: // %entry 924; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s 925; CHECK-NEXT: ret 926entry: 927 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 928 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 929 %sub.i = sub <2 x i64> %a, %0 930 ret <2 x i64> %sub.i 931} 932 933define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) { 934; CHECK-LABEL: test_vsubw_high_a8: 935; CHECK: // %bb.0: // %entry 936; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b 937; CHECK-NEXT: bic v0.8h, #255, lsl #8 938; CHECK-NEXT: ret 939entry: 940 %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 941 %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> 942 %sub.i = sub <8 x i16> %a, %0 943 %and = and <8 x i16> %sub.i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 944 ret <8 x i16> %and 945} 946 947define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) { 948; CHECK-LABEL: test_vsubw_high_a16: 949; CHECK: // %bb.0: // %entry 950; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h 951; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 952; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 953; CHECK-NEXT: ret 954entry: 955 %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 956 %0 = zext <4 x i16> %shuffle.i.i.i to <4 x i32> 957 %sub.i = sub <4 x i32> %a, %0 958 %and = and <4 x i32> %sub.i, <i32 65535, i32 65535, i32 65535, i32 65535> 959 ret <4 x i32> %and 960} 961 962define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) { 963; CHECK-LABEL: test_vsubw_high_a32: 964; CHECK: // %bb.0: // %entry 965; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s 966; CHECK-NEXT: movi v1.2d, #0x000000ffffffff 967; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 968; CHECK-NEXT: ret 969entry: 970 %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 971 %0 = zext <2 x i32> %shuffle.i.i.i to <2 x i64> 972 %sub.i = sub <2 x i64> %a, %0 973 %and = and <2 x i64> %sub.i, <i64 4294967295, i64 4294967295> 974 ret <2 x i64> %and 975} 976 977define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { 978; CHECK-LABEL: test_vaddhn_s16: 979; CHECK: // %bb.0: // %entry 980; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h 981; CHECK-NEXT: ret 982entry: 983 %vaddhn.i = add <8 x i16> %a, %b 984 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 985 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> 986 ret <8 x i8> %vaddhn2.i 987} 988 989define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { 990; CHECK-LABEL: test_vaddhn_s32: 991; CHECK: // %bb.0: // %entry 992; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s 993; CHECK-NEXT: ret 994entry: 995 %vaddhn.i = add <4 x i32> %a, %b 996 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> 997 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> 998 ret <4 x i16> %vaddhn2.i 999} 1000 1001define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { 1002; CHECK-LABEL: test_vaddhn_s64: 1003; CHECK: // %bb.0: // %entry 1004; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d 1005; CHECK-NEXT: ret 1006entry: 1007 %vaddhn.i = add <2 x i64> %a, %b 1008 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> 1009 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> 1010 ret <2 x i32> %vaddhn2.i 1011} 1012 1013define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { 1014; CHECK-LABEL: test_vaddhn_u16: 1015; CHECK: // %bb.0: // %entry 1016; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h 1017; CHECK-NEXT: ret 1018entry: 1019 %vaddhn.i = add <8 x i16> %a, %b 1020 %vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1021 %vaddhn2.i = trunc <8 x i16> %vaddhn1.i to <8 x i8> 1022 ret <8 x i8> %vaddhn2.i 1023} 1024 1025define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { 1026; CHECK-LABEL: test_vaddhn_u32: 1027; CHECK: // %bb.0: // %entry 1028; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s 1029; CHECK-NEXT: ret 1030entry: 1031 %vaddhn.i = add <4 x i32> %a, %b 1032 %vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16> 1033 %vaddhn2.i = trunc <4 x i32> %vaddhn1.i to <4 x i16> 1034 ret <4 x i16> %vaddhn2.i 1035} 1036 1037define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { 1038; CHECK-LABEL: test_vaddhn_u64: 1039; CHECK: // %bb.0: // %entry 1040; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d 1041; CHECK-NEXT: ret 1042entry: 1043 %vaddhn.i = add <2 x i64> %a, %b 1044 %vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32> 1045 %vaddhn2.i = trunc <2 x i64> %vaddhn1.i to <2 x i32> 1046 ret <2 x i32> %vaddhn2.i 1047} 1048 1049define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1050; CHECK-LABEL: test_vaddhn_high_s16: 1051; CHECK: // %bb.0: // %entry 1052; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1053; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h 1054; CHECK-NEXT: ret 1055entry: 1056 %vaddhn.i.i = add <8 x i16> %a, %b 1057 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1058 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> 1059 %0 = bitcast <8 x i8> %r to <1 x i64> 1060 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> 1061 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1062 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1063 ret <16 x i8> %2 1064} 1065 1066define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1067; CHECK-LABEL: test_vaddhn_high_s32: 1068; CHECK: // %bb.0: // %entry 1069; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1070; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s 1071; CHECK-NEXT: ret 1072entry: 1073 %vaddhn.i.i = add <4 x i32> %a, %b 1074 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> 1075 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> 1076 %0 = bitcast <4 x i16> %r to <1 x i64> 1077 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> 1078 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1079 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1080 ret <8 x i16> %2 1081} 1082 1083define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1084; CHECK-LABEL: test_vaddhn_high_s64: 1085; CHECK: // %bb.0: // %entry 1086; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1087; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d 1088; CHECK-NEXT: ret 1089entry: 1090 %vaddhn.i.i = add <2 x i64> %a, %b 1091 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> 1092 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> 1093 %0 = bitcast <2 x i32> %r to <1 x i64> 1094 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> 1095 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1096 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1097 ret <4 x i32> %2 1098} 1099 1100define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1101; CHECK-LABEL: test_vaddhn_high_u16: 1102; CHECK: // %bb.0: // %entry 1103; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1104; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h 1105; CHECK-NEXT: ret 1106entry: 1107 %vaddhn.i.i = add <8 x i16> %a, %b 1108 %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1109 %vaddhn2.i.i = trunc <8 x i16> %vaddhn1.i.i to <8 x i8> 1110 %0 = bitcast <8 x i8> %r to <1 x i64> 1111 %1 = bitcast <8 x i8> %vaddhn2.i.i to <1 x i64> 1112 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1113 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1114 ret <16 x i8> %2 1115} 1116 1117define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1118; CHECK-LABEL: test_vaddhn_high_u32: 1119; CHECK: // %bb.0: // %entry 1120; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1121; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s 1122; CHECK-NEXT: ret 1123entry: 1124 %vaddhn.i.i = add <4 x i32> %a, %b 1125 %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16> 1126 %vaddhn2.i.i = trunc <4 x i32> %vaddhn1.i.i to <4 x i16> 1127 %0 = bitcast <4 x i16> %r to <1 x i64> 1128 %1 = bitcast <4 x i16> %vaddhn2.i.i to <1 x i64> 1129 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1130 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1131 ret <8 x i16> %2 1132} 1133 1134define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1135; CHECK-LABEL: test_vaddhn_high_u64: 1136; CHECK: // %bb.0: // %entry 1137; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1138; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d 1139; CHECK-NEXT: ret 1140entry: 1141 %vaddhn.i.i = add <2 x i64> %a, %b 1142 %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32> 1143 %vaddhn2.i.i = trunc <2 x i64> %vaddhn1.i.i to <2 x i32> 1144 %0 = bitcast <2 x i32> %r to <1 x i64> 1145 %1 = bitcast <2 x i32> %vaddhn2.i.i to <1 x i64> 1146 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1147 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1148 ret <4 x i32> %2 1149} 1150 1151define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) { 1152; CHECK-LABEL: test_vraddhn_s16: 1153; CHECK: // %bb.0: // %entry 1154; CHECK-NEXT: raddhn v0.8b, v0.8h, v1.8h 1155; CHECK-NEXT: ret 1156entry: 1157 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1158 ret <8 x i8> %vraddhn2.i 1159} 1160 1161define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) { 1162; CHECK-LABEL: test_vraddhn_s32: 1163; CHECK: // %bb.0: // %entry 1164; CHECK-NEXT: raddhn v0.4h, v0.4s, v1.4s 1165; CHECK-NEXT: ret 1166entry: 1167 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1168 ret <4 x i16> %vraddhn2.i 1169} 1170 1171define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) { 1172; CHECK-LABEL: test_vraddhn_s64: 1173; CHECK: // %bb.0: // %entry 1174; CHECK-NEXT: raddhn v0.2s, v0.2d, v1.2d 1175; CHECK-NEXT: ret 1176entry: 1177 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1178 ret <2 x i32> %vraddhn2.i 1179} 1180 1181define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) { 1182; CHECK-LABEL: test_vraddhn_u16: 1183; CHECK: // %bb.0: // %entry 1184; CHECK-NEXT: raddhn v0.8b, v0.8h, v1.8h 1185; CHECK-NEXT: ret 1186entry: 1187 %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1188 ret <8 x i8> %vraddhn2.i 1189} 1190 1191define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) { 1192; CHECK-LABEL: test_vraddhn_u32: 1193; CHECK: // %bb.0: // %entry 1194; CHECK-NEXT: raddhn v0.4h, v0.4s, v1.4s 1195; CHECK-NEXT: ret 1196entry: 1197 %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1198 ret <4 x i16> %vraddhn2.i 1199} 1200 1201define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) { 1202; CHECK-LABEL: test_vraddhn_u64: 1203; CHECK: // %bb.0: // %entry 1204; CHECK-NEXT: raddhn v0.2s, v0.2d, v1.2d 1205; CHECK-NEXT: ret 1206entry: 1207 %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1208 ret <2 x i32> %vraddhn2.i 1209} 1210 1211define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1212; CHECK-LABEL: test_vraddhn_high_s16: 1213; CHECK: // %bb.0: // %entry 1214; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1215; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h 1216; CHECK-NEXT: ret 1217entry: 1218 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1219 %0 = bitcast <8 x i8> %r to <1 x i64> 1220 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> 1221 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1222 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1223 ret <16 x i8> %2 1224} 1225 1226define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1227; CHECK-LABEL: test_vraddhn_high_s32: 1228; CHECK: // %bb.0: // %entry 1229; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1230; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s 1231; CHECK-NEXT: ret 1232entry: 1233 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1234 %0 = bitcast <4 x i16> %r to <1 x i64> 1235 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> 1236 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1237 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1238 ret <8 x i16> %2 1239} 1240 1241define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1242; CHECK-LABEL: test_vraddhn_high_s64: 1243; CHECK: // %bb.0: // %entry 1244; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1245; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d 1246; CHECK-NEXT: ret 1247entry: 1248 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1249 %0 = bitcast <2 x i32> %r to <1 x i64> 1250 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> 1251 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1252 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1253 ret <4 x i32> %2 1254} 1255 1256define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1257; CHECK-LABEL: test_vraddhn_high_u16: 1258; CHECK: // %bb.0: // %entry 1259; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1260; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h 1261; CHECK-NEXT: ret 1262entry: 1263 %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1264 %0 = bitcast <8 x i8> %r to <1 x i64> 1265 %1 = bitcast <8 x i8> %vraddhn2.i.i to <1 x i64> 1266 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1267 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1268 ret <16 x i8> %2 1269} 1270 1271define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1272; CHECK-LABEL: test_vraddhn_high_u32: 1273; CHECK: // %bb.0: // %entry 1274; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1275; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s 1276; CHECK-NEXT: ret 1277entry: 1278 %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1279 %0 = bitcast <4 x i16> %r to <1 x i64> 1280 %1 = bitcast <4 x i16> %vraddhn2.i.i to <1 x i64> 1281 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1282 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1283 ret <8 x i16> %2 1284} 1285 1286define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1287; CHECK-LABEL: test_vraddhn_high_u64: 1288; CHECK: // %bb.0: // %entry 1289; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1290; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d 1291; CHECK-NEXT: ret 1292entry: 1293 %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1294 %0 = bitcast <2 x i32> %r to <1 x i64> 1295 %1 = bitcast <2 x i32> %vraddhn2.i.i to <1 x i64> 1296 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1297 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1298 ret <4 x i32> %2 1299} 1300 1301define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { 1302; CHECK-LABEL: test_vsubhn_s16: 1303; CHECK: // %bb.0: // %entry 1304; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h 1305; CHECK-NEXT: ret 1306entry: 1307 %vsubhn.i = sub <8 x i16> %a, %b 1308 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1309 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> 1310 ret <8 x i8> %vsubhn2.i 1311} 1312 1313define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { 1314; CHECK-LABEL: test_vsubhn_s32: 1315; CHECK: // %bb.0: // %entry 1316; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s 1317; CHECK-NEXT: ret 1318entry: 1319 %vsubhn.i = sub <4 x i32> %a, %b 1320 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> 1321 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> 1322 ret <4 x i16> %vsubhn2.i 1323} 1324 1325define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { 1326; CHECK-LABEL: test_vsubhn_s64: 1327; CHECK: // %bb.0: // %entry 1328; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d 1329; CHECK-NEXT: ret 1330entry: 1331 %vsubhn.i = sub <2 x i64> %a, %b 1332 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> 1333 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> 1334 ret <2 x i32> %vsubhn2.i 1335} 1336 1337define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { 1338; CHECK-LABEL: test_vsubhn_u16: 1339; CHECK: // %bb.0: // %entry 1340; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h 1341; CHECK-NEXT: ret 1342entry: 1343 %vsubhn.i = sub <8 x i16> %a, %b 1344 %vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1345 %vsubhn2.i = trunc <8 x i16> %vsubhn1.i to <8 x i8> 1346 ret <8 x i8> %vsubhn2.i 1347} 1348 1349define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { 1350; CHECK-LABEL: test_vsubhn_u32: 1351; CHECK: // %bb.0: // %entry 1352; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s 1353; CHECK-NEXT: ret 1354entry: 1355 %vsubhn.i = sub <4 x i32> %a, %b 1356 %vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16> 1357 %vsubhn2.i = trunc <4 x i32> %vsubhn1.i to <4 x i16> 1358 ret <4 x i16> %vsubhn2.i 1359} 1360 1361define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { 1362; CHECK-LABEL: test_vsubhn_u64: 1363; CHECK: // %bb.0: // %entry 1364; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d 1365; CHECK-NEXT: ret 1366entry: 1367 %vsubhn.i = sub <2 x i64> %a, %b 1368 %vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32> 1369 %vsubhn2.i = trunc <2 x i64> %vsubhn1.i to <2 x i32> 1370 ret <2 x i32> %vsubhn2.i 1371} 1372 1373define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1374; CHECK-LABEL: test_vsubhn_high_s16: 1375; CHECK: // %bb.0: // %entry 1376; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1377; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h 1378; CHECK-NEXT: ret 1379entry: 1380 %vsubhn.i.i = sub <8 x i16> %a, %b 1381 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1382 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> 1383 %0 = bitcast <8 x i8> %r to <1 x i64> 1384 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> 1385 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1386 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1387 ret <16 x i8> %2 1388} 1389 1390define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1391; CHECK-LABEL: test_vsubhn_high_s32: 1392; CHECK: // %bb.0: // %entry 1393; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1394; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s 1395; CHECK-NEXT: ret 1396entry: 1397 %vsubhn.i.i = sub <4 x i32> %a, %b 1398 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> 1399 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> 1400 %0 = bitcast <4 x i16> %r to <1 x i64> 1401 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> 1402 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1403 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1404 ret <8 x i16> %2 1405} 1406 1407define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1408; CHECK-LABEL: test_vsubhn_high_s64: 1409; CHECK: // %bb.0: // %entry 1410; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1411; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d 1412; CHECK-NEXT: ret 1413entry: 1414 %vsubhn.i.i = sub <2 x i64> %a, %b 1415 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> 1416 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> 1417 %0 = bitcast <2 x i32> %r to <1 x i64> 1418 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> 1419 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1420 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1421 ret <4 x i32> %2 1422} 1423 1424define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1425; CHECK-LABEL: test_vsubhn_high_u16: 1426; CHECK: // %bb.0: // %entry 1427; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1428; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h 1429; CHECK-NEXT: ret 1430entry: 1431 %vsubhn.i.i = sub <8 x i16> %a, %b 1432 %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1433 %vsubhn2.i.i = trunc <8 x i16> %vsubhn1.i.i to <8 x i8> 1434 %0 = bitcast <8 x i8> %r to <1 x i64> 1435 %1 = bitcast <8 x i8> %vsubhn2.i.i to <1 x i64> 1436 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1437 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1438 ret <16 x i8> %2 1439} 1440 1441define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1442; CHECK-LABEL: test_vsubhn_high_u32: 1443; CHECK: // %bb.0: // %entry 1444; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1445; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s 1446; CHECK-NEXT: ret 1447entry: 1448 %vsubhn.i.i = sub <4 x i32> %a, %b 1449 %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16> 1450 %vsubhn2.i.i = trunc <4 x i32> %vsubhn1.i.i to <4 x i16> 1451 %0 = bitcast <4 x i16> %r to <1 x i64> 1452 %1 = bitcast <4 x i16> %vsubhn2.i.i to <1 x i64> 1453 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1454 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1455 ret <8 x i16> %2 1456} 1457 1458define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1459; CHECK-LABEL: test_vsubhn_high_u64: 1460; CHECK: // %bb.0: // %entry 1461; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1462; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d 1463; CHECK-NEXT: ret 1464entry: 1465 %vsubhn.i.i = sub <2 x i64> %a, %b 1466 %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32> 1467 %vsubhn2.i.i = trunc <2 x i64> %vsubhn1.i.i to <2 x i32> 1468 %0 = bitcast <2 x i32> %r to <1 x i64> 1469 %1 = bitcast <2 x i32> %vsubhn2.i.i to <1 x i64> 1470 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1471 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1472 ret <4 x i32> %2 1473} 1474 1475define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) { 1476; CHECK-LABEL: test_vrsubhn_s16: 1477; CHECK: // %bb.0: // %entry 1478; CHECK-NEXT: rsubhn v0.8b, v0.8h, v1.8h 1479; CHECK-NEXT: ret 1480entry: 1481 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1482 ret <8 x i8> %vrsubhn2.i 1483} 1484 1485define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) { 1486; CHECK-LABEL: test_vrsubhn_s32: 1487; CHECK: // %bb.0: // %entry 1488; CHECK-NEXT: rsubhn v0.4h, v0.4s, v1.4s 1489; CHECK-NEXT: ret 1490entry: 1491 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1492 ret <4 x i16> %vrsubhn2.i 1493} 1494 1495define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) { 1496; CHECK-LABEL: test_vrsubhn_s64: 1497; CHECK: // %bb.0: // %entry 1498; CHECK-NEXT: rsubhn v0.2s, v0.2d, v1.2d 1499; CHECK-NEXT: ret 1500entry: 1501 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1502 ret <2 x i32> %vrsubhn2.i 1503} 1504 1505define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) { 1506; CHECK-LABEL: test_vrsubhn_u16: 1507; CHECK: // %bb.0: // %entry 1508; CHECK-NEXT: rsubhn v0.8b, v0.8h, v1.8h 1509; CHECK-NEXT: ret 1510entry: 1511 %vrsubhn2.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1512 ret <8 x i8> %vrsubhn2.i 1513} 1514 1515define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) { 1516; CHECK-LABEL: test_vrsubhn_u32: 1517; CHECK: // %bb.0: // %entry 1518; CHECK-NEXT: rsubhn v0.4h, v0.4s, v1.4s 1519; CHECK-NEXT: ret 1520entry: 1521 %vrsubhn2.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1522 ret <4 x i16> %vrsubhn2.i 1523} 1524 1525define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) { 1526; CHECK-LABEL: test_vrsubhn_u64: 1527; CHECK: // %bb.0: // %entry 1528; CHECK-NEXT: rsubhn v0.2s, v0.2d, v1.2d 1529; CHECK-NEXT: ret 1530entry: 1531 %vrsubhn2.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1532 ret <2 x i32> %vrsubhn2.i 1533} 1534 1535define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1536; CHECK-LABEL: test_vrsubhn_high_s16: 1537; CHECK: // %bb.0: // %entry 1538; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1539; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h 1540; CHECK-NEXT: ret 1541entry: 1542 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1543 %0 = bitcast <8 x i8> %r to <1 x i64> 1544 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> 1545 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1546 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1547 ret <16 x i8> %2 1548} 1549 1550define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1551; CHECK-LABEL: test_vrsubhn_high_s32: 1552; CHECK: // %bb.0: // %entry 1553; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1554; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s 1555; CHECK-NEXT: ret 1556entry: 1557 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1558 %0 = bitcast <4 x i16> %r to <1 x i64> 1559 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> 1560 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1561 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1562 ret <8 x i16> %2 1563} 1564 1565define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1566; CHECK-LABEL: test_vrsubhn_high_s64: 1567; CHECK: // %bb.0: // %entry 1568; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1569; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d 1570; CHECK-NEXT: ret 1571entry: 1572 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1573 %0 = bitcast <2 x i32> %r to <1 x i64> 1574 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> 1575 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1576 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1577 ret <4 x i32> %2 1578} 1579 1580define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { 1581; CHECK-LABEL: test_vrsubhn_high_u16: 1582; CHECK: // %bb.0: // %entry 1583; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1584; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h 1585; CHECK-NEXT: ret 1586entry: 1587 %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 1588 %0 = bitcast <8 x i8> %r to <1 x i64> 1589 %1 = bitcast <8 x i8> %vrsubhn2.i.i to <1 x i64> 1590 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1591 %2 = bitcast <2 x i64> %shuffle.i.i to <16 x i8> 1592 ret <16 x i8> %2 1593} 1594 1595define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { 1596; CHECK-LABEL: test_vrsubhn_high_u32: 1597; CHECK: // %bb.0: // %entry 1598; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1599; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s 1600; CHECK-NEXT: ret 1601entry: 1602 %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 1603 %0 = bitcast <4 x i16> %r to <1 x i64> 1604 %1 = bitcast <4 x i16> %vrsubhn2.i.i to <1 x i64> 1605 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1606 %2 = bitcast <2 x i64> %shuffle.i.i to <8 x i16> 1607 ret <8 x i16> %2 1608} 1609 1610define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { 1611; CHECK-LABEL: test_vrsubhn_high_u64: 1612; CHECK: // %bb.0: // %entry 1613; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1614; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d 1615; CHECK-NEXT: ret 1616entry: 1617 %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 1618 %0 = bitcast <2 x i32> %r to <1 x i64> 1619 %1 = bitcast <2 x i32> %vrsubhn2.i.i to <1 x i64> 1620 %shuffle.i.i = shufflevector <1 x i64> %0, <1 x i64> %1, <2 x i32> <i32 0, i32 1> 1621 %2 = bitcast <2 x i64> %shuffle.i.i to <4 x i32> 1622 ret <4 x i32> %2 1623} 1624 1625define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) { 1626; CHECK-LABEL: test_vabdl_s8: 1627; CHECK: // %bb.0: // %entry 1628; CHECK-NEXT: sabdl v0.8h, v0.8b, v1.8b 1629; CHECK-NEXT: ret 1630entry: 1631 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) 1632 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> 1633 ret <8 x i16> %vmovl.i.i 1634} 1635 1636define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) { 1637; CHECK-LABEL: test_vabdl_s16: 1638; CHECK: // %bb.0: // %entry 1639; CHECK-NEXT: sabdl v0.4s, v0.4h, v1.4h 1640; CHECK-NEXT: ret 1641entry: 1642 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) 1643 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> 1644 ret <4 x i32> %vmovl.i.i 1645} 1646 1647define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) { 1648; CHECK-LABEL: test_vabdl_s32: 1649; CHECK: // %bb.0: // %entry 1650; CHECK-NEXT: sabdl v0.2d, v0.2s, v1.2s 1651; CHECK-NEXT: ret 1652entry: 1653 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) 1654 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> 1655 ret <2 x i64> %vmovl.i.i 1656} 1657 1658define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) { 1659; CHECK-LABEL: test_vabdl_u8: 1660; CHECK: // %bb.0: // %entry 1661; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b 1662; CHECK-NEXT: ret 1663entry: 1664 %vabd.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) 1665 %vmovl.i.i = zext <8 x i8> %vabd.i.i to <8 x i16> 1666 ret <8 x i16> %vmovl.i.i 1667} 1668 1669define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) { 1670; CHECK-LABEL: test_vabdl_u16: 1671; CHECK: // %bb.0: // %entry 1672; CHECK-NEXT: uabdl v0.4s, v0.4h, v1.4h 1673; CHECK-NEXT: ret 1674entry: 1675 %vabd2.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) 1676 %vmovl.i.i = zext <4 x i16> %vabd2.i.i to <4 x i32> 1677 ret <4 x i32> %vmovl.i.i 1678} 1679 1680define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) { 1681; CHECK-LABEL: test_vabdl_u32: 1682; CHECK: // %bb.0: // %entry 1683; CHECK-NEXT: uabdl v0.2d, v0.2s, v1.2s 1684; CHECK-NEXT: ret 1685entry: 1686 %vabd2.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) 1687 %vmovl.i.i = zext <2 x i32> %vabd2.i.i to <2 x i64> 1688 ret <2 x i64> %vmovl.i.i 1689} 1690 1691define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1692; CHECK-LABEL: test_vabal_s8: 1693; CHECK: // %bb.0: // %entry 1694; CHECK-NEXT: sabal v0.8h, v1.8b, v2.8b 1695; CHECK-NEXT: ret 1696entry: 1697 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) 1698 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1699 %add.i = add <8 x i16> %vmovl.i.i.i, %a 1700 ret <8 x i16> %add.i 1701} 1702 1703define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1704; CHECK-LABEL: test_vabal_s16: 1705; CHECK: // %bb.0: // %entry 1706; CHECK-NEXT: sabal v0.4s, v1.4h, v2.4h 1707; CHECK-NEXT: ret 1708entry: 1709 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) 1710 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1711 %add.i = add <4 x i32> %vmovl.i.i.i, %a 1712 ret <4 x i32> %add.i 1713} 1714 1715define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1716; CHECK-LABEL: test_vabal_s32: 1717; CHECK: // %bb.0: // %entry 1718; CHECK-NEXT: sabal v0.2d, v1.2s, v2.2s 1719; CHECK-NEXT: ret 1720entry: 1721 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) 1722 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1723 %add.i = add <2 x i64> %vmovl.i.i.i, %a 1724 ret <2 x i64> %add.i 1725} 1726 1727define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 1728; CHECK-LABEL: test_vabal_u8: 1729; CHECK: // %bb.0: // %entry 1730; CHECK-NEXT: uabal v0.8h, v1.8b, v2.8b 1731; CHECK-NEXT: ret 1732entry: 1733 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) 1734 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1735 %add.i = add <8 x i16> %vmovl.i.i.i, %a 1736 ret <8 x i16> %add.i 1737} 1738 1739define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 1740; CHECK-LABEL: test_vabal_u16: 1741; CHECK: // %bb.0: // %entry 1742; CHECK-NEXT: uabal v0.4s, v1.4h, v2.4h 1743; CHECK-NEXT: ret 1744entry: 1745 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) 1746 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1747 %add.i = add <4 x i32> %vmovl.i.i.i, %a 1748 ret <4 x i32> %add.i 1749} 1750 1751define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 1752; CHECK-LABEL: test_vabal_u32: 1753; CHECK: // %bb.0: // %entry 1754; CHECK-NEXT: uabal v0.2d, v1.2s, v2.2s 1755; CHECK-NEXT: ret 1756entry: 1757 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) 1758 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1759 %add.i = add <2 x i64> %vmovl.i.i.i, %a 1760 ret <2 x i64> %add.i 1761} 1762 1763define <8 x i16> @test_vabdl_high_s8(<16 x i8> %a, <16 x i8> %b) { 1764; CHECK-LABEL: test_vabdl_high_s8: 1765; CHECK: // %bb.0: // %entry 1766; CHECK-NEXT: sabdl2 v0.8h, v0.16b, v1.16b 1767; CHECK-NEXT: ret 1768entry: 1769 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1770 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1771 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1772 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1773 ret <8 x i16> %vmovl.i.i.i 1774} 1775 1776define <4 x i32> @test_vabdl_high_s16(<8 x i16> %a, <8 x i16> %b) { 1777; CHECK-LABEL: test_vabdl_high_s16: 1778; CHECK: // %bb.0: // %entry 1779; CHECK-NEXT: sabdl2 v0.4s, v0.8h, v1.8h 1780; CHECK-NEXT: ret 1781entry: 1782 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1783 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1784 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1785 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1786 ret <4 x i32> %vmovl.i.i.i 1787} 1788 1789define <2 x i64> @test_vabdl_high_s32(<4 x i32> %a, <4 x i32> %b) { 1790; CHECK-LABEL: test_vabdl_high_s32: 1791; CHECK: // %bb.0: // %entry 1792; CHECK-NEXT: sabdl2 v0.2d, v0.4s, v1.4s 1793; CHECK-NEXT: ret 1794entry: 1795 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1796 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1797 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1798 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1799 ret <2 x i64> %vmovl.i.i.i 1800} 1801 1802define <8 x i16> @test_vabdl_high_u8(<16 x i8> %a, <16 x i8> %b) { 1803; CHECK-LABEL: test_vabdl_high_u8: 1804; CHECK: // %bb.0: // %entry 1805; CHECK-NEXT: uabdl2 v0.8h, v0.16b, v1.16b 1806; CHECK-NEXT: ret 1807entry: 1808 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1809 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1810 %vabd.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1811 %vmovl.i.i.i = zext <8 x i8> %vabd.i.i.i to <8 x i16> 1812 ret <8 x i16> %vmovl.i.i.i 1813} 1814 1815define <4 x i32> @test_vabdl_high_u16(<8 x i16> %a, <8 x i16> %b) { 1816; CHECK-LABEL: test_vabdl_high_u16: 1817; CHECK: // %bb.0: // %entry 1818; CHECK-NEXT: uabdl2 v0.4s, v0.8h, v1.8h 1819; CHECK-NEXT: ret 1820entry: 1821 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1822 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1823 %vabd2.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1824 %vmovl.i.i.i = zext <4 x i16> %vabd2.i.i.i to <4 x i32> 1825 ret <4 x i32> %vmovl.i.i.i 1826} 1827 1828define <2 x i64> @test_vabdl_high_u32(<4 x i32> %a, <4 x i32> %b) { 1829; CHECK-LABEL: test_vabdl_high_u32: 1830; CHECK: // %bb.0: // %entry 1831; CHECK-NEXT: uabdl2 v0.2d, v0.4s, v1.4s 1832; CHECK-NEXT: ret 1833entry: 1834 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1835 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1836 %vabd2.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1837 %vmovl.i.i.i = zext <2 x i32> %vabd2.i.i.i to <2 x i64> 1838 ret <2 x i64> %vmovl.i.i.i 1839} 1840 1841define <8 x i16> @test_vabal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1842; CHECK-LABEL: test_vabal_high_s8: 1843; CHECK: // %bb.0: // %entry 1844; CHECK-NEXT: sabal2 v0.8h, v1.16b, v2.16b 1845; CHECK-NEXT: ret 1846entry: 1847 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1848 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1849 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1850 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> 1851 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a 1852 ret <8 x i16> %add.i.i 1853} 1854 1855define <4 x i32> @test_vabal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1856; CHECK-LABEL: test_vabal_high_s16: 1857; CHECK: // %bb.0: // %entry 1858; CHECK-NEXT: sabal2 v0.4s, v1.8h, v2.8h 1859; CHECK-NEXT: ret 1860entry: 1861 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1862 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1863 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1864 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> 1865 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a 1866 ret <4 x i32> %add.i.i 1867} 1868 1869define <2 x i64> @test_vabal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1870; CHECK-LABEL: test_vabal_high_s32: 1871; CHECK: // %bb.0: // %entry 1872; CHECK-NEXT: sabal2 v0.2d, v1.4s, v2.4s 1873; CHECK-NEXT: ret 1874entry: 1875 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1876 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1877 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1878 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> 1879 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a 1880 ret <2 x i64> %add.i.i 1881} 1882 1883define <8 x i16> @test_vabal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 1884; CHECK-LABEL: test_vabal_high_u8: 1885; CHECK: // %bb.0: // %entry 1886; CHECK-NEXT: uabal2 v0.8h, v1.16b, v2.16b 1887; CHECK-NEXT: ret 1888entry: 1889 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1890 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1891 %vabd.i.i.i.i = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1892 %vmovl.i.i.i.i = zext <8 x i8> %vabd.i.i.i.i to <8 x i16> 1893 %add.i.i = add <8 x i16> %vmovl.i.i.i.i, %a 1894 ret <8 x i16> %add.i.i 1895} 1896 1897define <4 x i32> @test_vabal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 1898; CHECK-LABEL: test_vabal_high_u16: 1899; CHECK: // %bb.0: // %entry 1900; CHECK-NEXT: uabal2 v0.4s, v1.8h, v2.8h 1901; CHECK-NEXT: ret 1902entry: 1903 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1904 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1905 %vabd2.i.i.i.i = tail call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 1906 %vmovl.i.i.i.i = zext <4 x i16> %vabd2.i.i.i.i to <4 x i32> 1907 %add.i.i = add <4 x i32> %vmovl.i.i.i.i, %a 1908 ret <4 x i32> %add.i.i 1909} 1910 1911define <2 x i64> @test_vabal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 1912; CHECK-LABEL: test_vabal_high_u32: 1913; CHECK: // %bb.0: // %entry 1914; CHECK-NEXT: uabal2 v0.2d, v1.4s, v2.4s 1915; CHECK-NEXT: ret 1916entry: 1917 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1918 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1919 %vabd2.i.i.i.i = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 1920 %vmovl.i.i.i.i = zext <2 x i32> %vabd2.i.i.i.i to <2 x i64> 1921 %add.i.i = add <2 x i64> %vmovl.i.i.i.i, %a 1922 ret <2 x i64> %add.i.i 1923} 1924 1925define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) { 1926; CHECK-LABEL: test_vmull_s8: 1927; CHECK: // %bb.0: // %entry 1928; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b 1929; CHECK-NEXT: ret 1930entry: 1931 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) 1932 ret <8 x i16> %vmull.i 1933} 1934 1935define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) { 1936; CHECK-LABEL: test_vmull_s16: 1937; CHECK: // %bb.0: // %entry 1938; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h 1939; CHECK-NEXT: ret 1940entry: 1941 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) 1942 ret <4 x i32> %vmull2.i 1943} 1944 1945define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) { 1946; CHECK-LABEL: test_vmull_s32: 1947; CHECK: // %bb.0: // %entry 1948; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s 1949; CHECK-NEXT: ret 1950entry: 1951 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) 1952 ret <2 x i64> %vmull2.i 1953} 1954 1955define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) { 1956; CHECK-LABEL: test_vmull_u8: 1957; CHECK: // %bb.0: // %entry 1958; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b 1959; CHECK-NEXT: ret 1960entry: 1961 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) 1962 ret <8 x i16> %vmull.i 1963} 1964 1965define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) { 1966; CHECK-LABEL: test_vmull_u16: 1967; CHECK: // %bb.0: // %entry 1968; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h 1969; CHECK-NEXT: ret 1970entry: 1971 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) 1972 ret <4 x i32> %vmull2.i 1973} 1974 1975define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) { 1976; CHECK-LABEL: test_vmull_u32: 1977; CHECK: // %bb.0: // %entry 1978; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s 1979; CHECK-NEXT: ret 1980entry: 1981 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) 1982 ret <2 x i64> %vmull2.i 1983} 1984 1985define <8 x i16> @test_vmull_high_s8(<16 x i8> %a, <16 x i8> %b) { 1986; CHECK-LABEL: test_vmull_high_s8: 1987; CHECK: // %bb.0: // %entry 1988; CHECK-NEXT: smull2 v0.8h, v0.16b, v1.16b 1989; CHECK-NEXT: ret 1990entry: 1991 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1992 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1993 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 1994 ret <8 x i16> %vmull.i.i 1995} 1996 1997define <4 x i32> @test_vmull_high_s16(<8 x i16> %a, <8 x i16> %b) { 1998; CHECK-LABEL: test_vmull_high_s16: 1999; CHECK: // %bb.0: // %entry 2000; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h 2001; CHECK-NEXT: ret 2002entry: 2003 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2004 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2005 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2006 ret <4 x i32> %vmull2.i.i 2007} 2008 2009define <2 x i64> @test_vmull_high_s32(<4 x i32> %a, <4 x i32> %b) { 2010; CHECK-LABEL: test_vmull_high_s32: 2011; CHECK: // %bb.0: // %entry 2012; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.4s 2013; CHECK-NEXT: ret 2014entry: 2015 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2016 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2017 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2018 ret <2 x i64> %vmull2.i.i 2019} 2020 2021define <8 x i16> @test_vmull_high_u8(<16 x i8> %a, <16 x i8> %b) { 2022; CHECK-LABEL: test_vmull_high_u8: 2023; CHECK: // %bb.0: // %entry 2024; CHECK-NEXT: umull2 v0.8h, v0.16b, v1.16b 2025; CHECK-NEXT: ret 2026entry: 2027 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2028 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2029 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 2030 ret <8 x i16> %vmull.i.i 2031} 2032 2033define <4 x i32> @test_vmull_high_u16(<8 x i16> %a, <8 x i16> %b) { 2034; CHECK-LABEL: test_vmull_high_u16: 2035; CHECK: // %bb.0: // %entry 2036; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.8h 2037; CHECK-NEXT: ret 2038entry: 2039 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2040 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2041 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2042 ret <4 x i32> %vmull2.i.i 2043} 2044 2045define <2 x i64> @test_vmull_high_u32(<4 x i32> %a, <4 x i32> %b) { 2046; CHECK-LABEL: test_vmull_high_u32: 2047; CHECK: // %bb.0: // %entry 2048; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.4s 2049; CHECK-NEXT: ret 2050entry: 2051 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2052 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2053 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2054 ret <2 x i64> %vmull2.i.i 2055} 2056 2057define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 2058; CHECK-LABEL: test_vmlal_s8: 2059; CHECK: // %bb.0: // %entry 2060; CHECK-NEXT: smlal v0.8h, v1.8b, v2.8b 2061; CHECK-NEXT: ret 2062entry: 2063 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 2064 %add.i = add <8 x i16> %vmull.i.i, %a 2065 ret <8 x i16> %add.i 2066} 2067 2068define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 2069; CHECK-LABEL: test_vmlal_s16: 2070; CHECK: // %bb.0: // %entry 2071; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h 2072; CHECK-NEXT: ret 2073entry: 2074 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 2075 %add.i = add <4 x i32> %vmull2.i.i, %a 2076 ret <4 x i32> %add.i 2077} 2078 2079define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 2080; CHECK-LABEL: test_vmlal_s32: 2081; CHECK: // %bb.0: // %entry 2082; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s 2083; CHECK-NEXT: ret 2084entry: 2085 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 2086 %add.i = add <2 x i64> %vmull2.i.i, %a 2087 ret <2 x i64> %add.i 2088} 2089 2090define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 2091; CHECK-LABEL: test_vmlal_u8: 2092; CHECK: // %bb.0: // %entry 2093; CHECK-NEXT: umlal v0.8h, v1.8b, v2.8b 2094; CHECK-NEXT: ret 2095entry: 2096 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 2097 %add.i = add <8 x i16> %vmull.i.i, %a 2098 ret <8 x i16> %add.i 2099} 2100 2101define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 2102; CHECK-LABEL: test_vmlal_u16: 2103; CHECK: // %bb.0: // %entry 2104; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h 2105; CHECK-NEXT: ret 2106entry: 2107 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 2108 %add.i = add <4 x i32> %vmull2.i.i, %a 2109 ret <4 x i32> %add.i 2110} 2111 2112define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 2113; CHECK-LABEL: test_vmlal_u32: 2114; CHECK: // %bb.0: // %entry 2115; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s 2116; CHECK-NEXT: ret 2117entry: 2118 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 2119 %add.i = add <2 x i64> %vmull2.i.i, %a 2120 ret <2 x i64> %add.i 2121} 2122 2123define <8 x i16> @test_vmlal_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 2124; CHECK-LABEL: test_vmlal_high_s8: 2125; CHECK: // %bb.0: // %entry 2126; CHECK-NEXT: smlal2 v0.8h, v1.16b, v2.16b 2127; CHECK-NEXT: ret 2128entry: 2129 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2130 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2131 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 2132 %add.i.i = add <8 x i16> %vmull.i.i.i, %a 2133 ret <8 x i16> %add.i.i 2134} 2135 2136define <4 x i32> @test_vmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 2137; CHECK-LABEL: test_vmlal_high_s16: 2138; CHECK: // %bb.0: // %entry 2139; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.8h 2140; CHECK-NEXT: ret 2141entry: 2142 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2143 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2144 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2145 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 2146 ret <4 x i32> %add.i.i 2147} 2148 2149define <2 x i64> @test_vmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 2150; CHECK-LABEL: test_vmlal_high_s32: 2151; CHECK: // %bb.0: // %entry 2152; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.4s 2153; CHECK-NEXT: ret 2154entry: 2155 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2156 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2157 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2158 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 2159 ret <2 x i64> %add.i.i 2160} 2161 2162define <8 x i16> @test_vmlal_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 2163; CHECK-LABEL: test_vmlal_high_u8: 2164; CHECK: // %bb.0: // %entry 2165; CHECK-NEXT: umlal2 v0.8h, v1.16b, v2.16b 2166; CHECK-NEXT: ret 2167entry: 2168 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2169 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2170 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 2171 %add.i.i = add <8 x i16> %vmull.i.i.i, %a 2172 ret <8 x i16> %add.i.i 2173} 2174 2175define <4 x i32> @test_vmlal_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 2176; CHECK-LABEL: test_vmlal_high_u16: 2177; CHECK: // %bb.0: // %entry 2178; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.8h 2179; CHECK-NEXT: ret 2180entry: 2181 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2182 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2183 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2184 %add.i.i = add <4 x i32> %vmull2.i.i.i, %a 2185 ret <4 x i32> %add.i.i 2186} 2187 2188define <2 x i64> @test_vmlal_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 2189; CHECK-LABEL: test_vmlal_high_u32: 2190; CHECK: // %bb.0: // %entry 2191; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.4s 2192; CHECK-NEXT: ret 2193entry: 2194 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2195 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2196 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2197 %add.i.i = add <2 x i64> %vmull2.i.i.i, %a 2198 ret <2 x i64> %add.i.i 2199} 2200 2201define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 2202; CHECK-LABEL: test_vmlsl_s8: 2203; CHECK: // %bb.0: // %entry 2204; CHECK-NEXT: smlsl v0.8h, v1.8b, v2.8b 2205; CHECK-NEXT: ret 2206entry: 2207 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 2208 %sub.i = sub <8 x i16> %a, %vmull.i.i 2209 ret <8 x i16> %sub.i 2210} 2211 2212define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 2213; CHECK-LABEL: test_vmlsl_s16: 2214; CHECK: // %bb.0: // %entry 2215; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h 2216; CHECK-NEXT: ret 2217entry: 2218 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 2219 %sub.i = sub <4 x i32> %a, %vmull2.i.i 2220 ret <4 x i32> %sub.i 2221} 2222 2223define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 2224; CHECK-LABEL: test_vmlsl_s32: 2225; CHECK: // %bb.0: // %entry 2226; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s 2227; CHECK-NEXT: ret 2228entry: 2229 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 2230 %sub.i = sub <2 x i64> %a, %vmull2.i.i 2231 ret <2 x i64> %sub.i 2232} 2233 2234define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) { 2235; CHECK-LABEL: test_vmlsl_u8: 2236; CHECK: // %bb.0: // %entry 2237; CHECK-NEXT: umlsl v0.8h, v1.8b, v2.8b 2238; CHECK-NEXT: ret 2239entry: 2240 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 2241 %sub.i = sub <8 x i16> %a, %vmull.i.i 2242 ret <8 x i16> %sub.i 2243} 2244 2245define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 2246; CHECK-LABEL: test_vmlsl_u16: 2247; CHECK: // %bb.0: // %entry 2248; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.4h 2249; CHECK-NEXT: ret 2250entry: 2251 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 2252 %sub.i = sub <4 x i32> %a, %vmull2.i.i 2253 ret <4 x i32> %sub.i 2254} 2255 2256define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 2257; CHECK-LABEL: test_vmlsl_u32: 2258; CHECK: // %bb.0: // %entry 2259; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.2s 2260; CHECK-NEXT: ret 2261entry: 2262 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 2263 %sub.i = sub <2 x i64> %a, %vmull2.i.i 2264 ret <2 x i64> %sub.i 2265} 2266 2267define <8 x i16> @test_vmlsl_high_s8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 2268; CHECK-LABEL: test_vmlsl_high_s8: 2269; CHECK: // %bb.0: // %entry 2270; CHECK-NEXT: smlsl2 v0.8h, v1.16b, v2.16b 2271; CHECK-NEXT: ret 2272entry: 2273 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2274 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2275 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 2276 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i 2277 ret <8 x i16> %sub.i.i 2278} 2279 2280define <4 x i32> @test_vmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 2281; CHECK-LABEL: test_vmlsl_high_s16: 2282; CHECK: // %bb.0: // %entry 2283; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.8h 2284; CHECK-NEXT: ret 2285entry: 2286 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2287 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2288 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2289 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 2290 ret <4 x i32> %sub.i.i 2291} 2292 2293define <2 x i64> @test_vmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 2294; CHECK-LABEL: test_vmlsl_high_s32: 2295; CHECK: // %bb.0: // %entry 2296; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.4s 2297; CHECK-NEXT: ret 2298entry: 2299 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2300 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2301 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2302 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 2303 ret <2 x i64> %sub.i.i 2304} 2305 2306define <8 x i16> @test_vmlsl_high_u8(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) { 2307; CHECK-LABEL: test_vmlsl_high_u8: 2308; CHECK: // %bb.0: // %entry 2309; CHECK-NEXT: umlsl2 v0.8h, v1.16b, v2.16b 2310; CHECK-NEXT: ret 2311entry: 2312 %shuffle.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2313 %shuffle.i3.i = shufflevector <16 x i8> %c, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2314 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 2315 %sub.i.i = sub <8 x i16> %a, %vmull.i.i.i 2316 ret <8 x i16> %sub.i.i 2317} 2318 2319define <4 x i32> @test_vmlsl_high_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 2320; CHECK-LABEL: test_vmlsl_high_u16: 2321; CHECK: // %bb.0: // %entry 2322; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.8h 2323; CHECK-NEXT: ret 2324entry: 2325 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2326 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2327 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2328 %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i 2329 ret <4 x i32> %sub.i.i 2330} 2331 2332define <2 x i64> @test_vmlsl_high_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 2333; CHECK-LABEL: test_vmlsl_high_u32: 2334; CHECK: // %bb.0: // %entry 2335; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.4s 2336; CHECK-NEXT: ret 2337entry: 2338 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2339 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2340 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2341 %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i 2342 ret <2 x i64> %sub.i.i 2343} 2344 2345define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) { 2346; CHECK-LABEL: test_vqdmull_s16: 2347; CHECK: // %bb.0: // %entry 2348; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h 2349; CHECK-NEXT: ret 2350entry: 2351 %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) 2352 ret <4 x i32> %vqdmull2.i 2353} 2354 2355define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) { 2356; CHECK-LABEL: test_vqdmull_s32: 2357; CHECK: // %bb.0: // %entry 2358; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.2s 2359; CHECK-NEXT: ret 2360entry: 2361 %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) 2362 ret <2 x i64> %vqdmull2.i 2363} 2364 2365define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 2366; CHECK-LABEL: test_vqdmlal_s16: 2367; CHECK: // %bb.0: // %entry 2368; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.4h 2369; CHECK-NEXT: ret 2370entry: 2371 %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 2372 %vqdmlal4.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i) 2373 ret <4 x i32> %vqdmlal4.i 2374} 2375 2376define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 2377; CHECK-LABEL: test_vqdmlal_s32: 2378; CHECK: // %bb.0: // %entry 2379; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.2s 2380; CHECK-NEXT: ret 2381entry: 2382 %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 2383 %vqdmlal4.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i) 2384 ret <2 x i64> %vqdmlal4.i 2385} 2386 2387define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) { 2388; CHECK-LABEL: test_vqdmlsl_s16: 2389; CHECK: // %bb.0: // %entry 2390; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.4h 2391; CHECK-NEXT: ret 2392entry: 2393 %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 2394 %vqdmlsl4.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i) 2395 ret <4 x i32> %vqdmlsl4.i 2396} 2397 2398define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) { 2399; CHECK-LABEL: test_vqdmlsl_s32: 2400; CHECK: // %bb.0: // %entry 2401; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.2s 2402; CHECK-NEXT: ret 2403entry: 2404 %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 2405 %vqdmlsl4.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i) 2406 ret <2 x i64> %vqdmlsl4.i 2407} 2408 2409define <4 x i32> @test_vqdmull_high_s16(<8 x i16> %a, <8 x i16> %b) { 2410; CHECK-LABEL: test_vqdmull_high_s16: 2411; CHECK: // %bb.0: // %entry 2412; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.8h 2413; CHECK-NEXT: ret 2414entry: 2415 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2416 %shuffle.i3.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2417 %vqdmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2418 ret <4 x i32> %vqdmull2.i.i 2419} 2420 2421define <2 x i64> @test_vqdmull_high_s32(<4 x i32> %a, <4 x i32> %b) { 2422; CHECK-LABEL: test_vqdmull_high_s32: 2423; CHECK: // %bb.0: // %entry 2424; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.4s 2425; CHECK-NEXT: ret 2426entry: 2427 %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2428 %shuffle.i3.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2429 %vqdmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2430 ret <2 x i64> %vqdmull2.i.i 2431} 2432 2433define <4 x i32> @test_vqdmlal_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 2434; CHECK-LABEL: test_vqdmlal_high_s16: 2435; CHECK: // %bb.0: // %entry 2436; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h 2437; CHECK-NEXT: ret 2438entry: 2439 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2440 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2441 %vqdmlal2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2442 %vqdmlal4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal2.i.i) 2443 ret <4 x i32> %vqdmlal4.i.i 2444} 2445 2446define <2 x i64> @test_vqdmlal_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 2447; CHECK-LABEL: test_vqdmlal_high_s32: 2448; CHECK: // %bb.0: // %entry 2449; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.4s 2450; CHECK-NEXT: ret 2451entry: 2452 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2453 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2454 %vqdmlal2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2455 %vqdmlal4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal2.i.i) 2456 ret <2 x i64> %vqdmlal4.i.i 2457} 2458 2459define <4 x i32> @test_vqdmlsl_high_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) { 2460; CHECK-LABEL: test_vqdmlsl_high_s16: 2461; CHECK: // %bb.0: // %entry 2462; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.8h 2463; CHECK-NEXT: ret 2464entry: 2465 %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2466 %shuffle.i3.i = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2467 %vqdmlsl2.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %shuffle.i3.i) 2468 %vqdmlsl4.i.i = tail call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl2.i.i) 2469 ret <4 x i32> %vqdmlsl4.i.i 2470} 2471 2472define <2 x i64> @test_vqdmlsl_high_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) { 2473; CHECK-LABEL: test_vqdmlsl_high_s32: 2474; CHECK: // %bb.0: // %entry 2475; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.4s 2476; CHECK-NEXT: ret 2477entry: 2478 %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2479 %shuffle.i3.i = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 2480 %vqdmlsl2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %shuffle.i3.i) 2481 %vqdmlsl4.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl2.i.i) 2482 ret <2 x i64> %vqdmlsl4.i.i 2483} 2484 2485define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) { 2486; CHECK-LABEL: test_vmull_p8: 2487; CHECK: // %bb.0: // %entry 2488; CHECK-NEXT: pmull v0.8h, v0.8b, v1.8b 2489; CHECK-NEXT: ret 2490entry: 2491 %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) 2492 ret <8 x i16> %vmull.i 2493} 2494 2495define <8 x i16> @test_vmull_high_p8(<16 x i8> %a, <16 x i8> %b) { 2496; CHECK-LABEL: test_vmull_high_p8: 2497; CHECK: // %bb.0: // %entry 2498; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b 2499; CHECK-NEXT: ret 2500entry: 2501 %shuffle.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2502 %shuffle.i3.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2503 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %shuffle.i.i, <8 x i8> %shuffle.i3.i) 2504 ret <8 x i16> %vmull.i.i 2505} 2506 2507define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { 2508; CHECK-LABEL: test_vmull_p64: 2509; CHECK: // %bb.0: // %entry 2510; CHECK-NEXT: fmov d0, x0 2511; CHECK-NEXT: fmov d1, x1 2512; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d 2513; CHECK-NEXT: mov x1, v0.d[1] 2514; CHECK-NEXT: fmov x0, d0 2515; CHECK-NEXT: ret 2516entry: 2517 %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) 2518 %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 2519 ret i128 %vmull3.i 2520} 2521 2522define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { 2523; CHECK-LABEL: test_vmull_high_p64: 2524; CHECK: // %bb.0: // %entry 2525; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d 2526; CHECK-NEXT: mov x1, v0.d[1] 2527; CHECK-NEXT: fmov x0, d0 2528; CHECK-NEXT: ret 2529entry: 2530 %0 = extractelement <2 x i64> %a, i32 1 2531 %1 = extractelement <2 x i64> %b, i32 1 2532 %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %0, i64 %1) #1 2533 %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 2534 ret i128 %vmull3.i.i 2535} 2536 2537 2538