1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s 2 3%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } 4%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 5%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } 6 7define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind { 8; CHECK-LABEL: ld2_8b 9; Make sure we are loading into the results defined by the ABI (i.e., v0, v1) 10; and from the argument of the function also defined by ABI (i.e., x0) 11; CHECK: ld2.8b { v0, v1 }, [x0] 12; CHECK-NEXT: ret 13 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) 14 ret %struct.__neon_int8x8x2_t %tmp2 15} 16 17define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind { 18; CHECK-LABEL: ld3_8b 19; Make sure we are using the operands defined by the ABI 20; CHECK: ld3.8b { v0, v1, v2 }, [x0] 21; CHECK-NEXT: ret 22 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) 23 ret %struct.__neon_int8x8x3_t %tmp2 24} 25 26define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind { 27; CHECK-LABEL: ld4_8b 28; Make sure we are using the operands defined by the ABI 29; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0] 30; CHECK-NEXT: ret 31 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) 32 ret %struct.__neon_int8x8x4_t %tmp2 33} 34 35declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly 36declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly 37declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly 38 39%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } 40%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 41%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } 42 43define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind { 44; CHECK-LABEL: ld2_16b 45; Make sure we are using the operands defined by the ABI 46; CHECK: ld2.16b { v0, v1 }, [x0] 47; CHECK-NEXT: ret 48 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) 49 ret %struct.__neon_int8x16x2_t %tmp2 50} 51 52define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind { 53; CHECK-LABEL: ld3_16b 54; Make sure we are using the operands defined by the ABI 55; CHECK: ld3.16b { v0, v1, v2 }, [x0] 56; CHECK-NEXT: ret 57 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) 58 ret %struct.__neon_int8x16x3_t %tmp2 59} 60 61define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind { 62; CHECK-LABEL: ld4_16b 63; Make sure we are using the operands defined by the ABI 64; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0] 65; CHECK-NEXT: ret 66 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) 67 ret %struct.__neon_int8x16x4_t %tmp2 68} 69 70declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly 71declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly 72declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly 73 74%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } 75%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 76%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 77 78define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind { 79; CHECK-LABEL: ld2_4h 80; Make sure we are using the operands defined by the ABI 81; CHECK: ld2.4h { v0, v1 }, [x0] 82; CHECK-NEXT: ret 83 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) 84 ret %struct.__neon_int16x4x2_t %tmp2 85} 86 87define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind { 88; CHECK-LABEL: ld3_4h 89; Make sure we are using the operands defined by the ABI 90; CHECK: ld3.4h { v0, v1, v2 }, [x0] 91; CHECK-NEXT: ret 92 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) 93 ret %struct.__neon_int16x4x3_t %tmp2 94} 95 96define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind { 97; CHECK-LABEL: ld4_4h 98; Make sure we are using the operands defined by the ABI 99; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0] 100; CHECK-NEXT: ret 101 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) 102 ret %struct.__neon_int16x4x4_t %tmp2 103} 104 105declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly 106declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly 107declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly 108 109%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } 110%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 111%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } 112 113define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind { 114; CHECK-LABEL: ld2_8h 115; Make sure we are using the operands defined by the ABI 116; CHECK: ld2.8h { v0, v1 }, [x0] 117; CHECK-NEXT: ret 118 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) 119 ret %struct.__neon_int16x8x2_t %tmp2 120} 121 122define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind { 123; CHECK-LABEL: ld3_8h 124; Make sure we are using the operands defined by the ABI 125; CHECK: ld3.8h { v0, v1, v2 }, [x0] 126; CHECK-NEXT: ret 127 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) 128 ret %struct.__neon_int16x8x3_t %tmp2 129} 130 131define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind { 132; CHECK-LABEL: ld4_8h 133; Make sure we are using the operands defined by the ABI 134; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0] 135; CHECK-NEXT: ret 136 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) 137 ret %struct.__neon_int16x8x4_t %tmp2 138} 139 140declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly 141declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly 142declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly 143 144%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } 145%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 146%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 147 148define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind { 149; CHECK-LABEL: ld2_2s 150; Make sure we are using the operands defined by the ABI 151; CHECK: ld2.2s { v0, v1 }, [x0] 152; CHECK-NEXT: ret 153 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) 154 ret %struct.__neon_int32x2x2_t %tmp2 155} 156 157define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind { 158; CHECK-LABEL: ld3_2s 159; Make sure we are using the operands defined by the ABI 160; CHECK: ld3.2s { v0, v1, v2 }, [x0] 161; CHECK-NEXT: ret 162 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) 163 ret %struct.__neon_int32x2x3_t %tmp2 164} 165 166define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind { 167; CHECK-LABEL: ld4_2s 168; Make sure we are using the operands defined by the ABI 169; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0] 170; CHECK-NEXT: ret 171 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) 172 ret %struct.__neon_int32x2x4_t %tmp2 173} 174 175declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly 176declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly 177declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly 178 179%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } 180%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 181%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } 182 183define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind { 184; CHECK-LABEL: ld2_4s 185; Make sure we are using the operands defined by the ABI 186; CHECK: ld2.4s { v0, v1 }, [x0] 187; CHECK-NEXT: ret 188 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) 189 ret %struct.__neon_int32x4x2_t %tmp2 190} 191 192define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind { 193; CHECK-LABEL: ld3_4s 194; Make sure we are using the operands defined by the ABI 195; CHECK: ld3.4s { v0, v1, v2 }, [x0] 196; CHECK-NEXT: ret 197 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) 198 ret %struct.__neon_int32x4x3_t %tmp2 199} 200 201define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind { 202; CHECK-LABEL: ld4_4s 203; Make sure we are using the operands defined by the ABI 204; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0] 205; CHECK-NEXT: ret 206 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) 207 ret %struct.__neon_int32x4x4_t %tmp2 208} 209 210declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly 211declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly 212declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly 213 214%struct.__neon_int64x2x2_t = type { <2 x i64>, <2 x i64> } 215%struct.__neon_int64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> } 216%struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } 217 218define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind { 219; CHECK-LABEL: ld2_2d 220; Make sure we are using the operands defined by the ABI 221; CHECK: ld2.2d { v0, v1 }, [x0] 222; CHECK-NEXT: ret 223 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) 224 ret %struct.__neon_int64x2x2_t %tmp2 225} 226 227define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind { 228; CHECK-LABEL: ld3_2d 229; Make sure we are using the operands defined by the ABI 230; CHECK: ld3.2d { v0, v1, v2 }, [x0] 231; CHECK-NEXT: ret 232 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) 233 ret %struct.__neon_int64x2x3_t %tmp2 234} 235 236define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind { 237; CHECK-LABEL: ld4_2d 238; Make sure we are using the operands defined by the ABI 239; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0] 240; CHECK-NEXT: ret 241 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) 242 ret %struct.__neon_int64x2x4_t %tmp2 243} 244 245declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly 246declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly 247declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly 248 249%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } 250%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 251%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } 252 253 254define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind { 255; CHECK-LABEL: ld2_1di64 256; Make sure we are using the operands defined by the ABI 257; CHECK: ld1.1d { v0, v1 }, [x0] 258; CHECK-NEXT: ret 259 %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) 260 ret %struct.__neon_int64x1x2_t %tmp2 261} 262 263define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind { 264; CHECK-LABEL: ld3_1di64 265; Make sure we are using the operands defined by the ABI 266; CHECK: ld1.1d { v0, v1, v2 }, [x0] 267; CHECK-NEXT: ret 268 %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) 269 ret %struct.__neon_int64x1x3_t %tmp2 270} 271 272define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind { 273; CHECK-LABEL: ld4_1di64 274; Make sure we are using the operands defined by the ABI 275; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0] 276; CHECK-NEXT: ret 277 %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) 278 ret %struct.__neon_int64x1x4_t %tmp2 279} 280 281 282declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly 283declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly 284declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly 285 286%struct.__neon_float64x1x2_t = type { <1 x double>, <1 x double> } 287%struct.__neon_float64x1x3_t = type { <1 x double>, <1 x double>, <1 x double> } 288%struct.__neon_float64x1x4_t = type { <1 x double>, <1 x double>, <1 x double>, <1 x double> } 289 290 291define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind { 292; CHECK-LABEL: ld2_1df64 293; Make sure we are using the operands defined by the ABI 294; CHECK: ld1.1d { v0, v1 }, [x0] 295; CHECK-NEXT: ret 296 %tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) 297 ret %struct.__neon_float64x1x2_t %tmp2 298} 299 300define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind { 301; CHECK-LABEL: ld3_1df64 302; Make sure we are using the operands defined by the ABI 303; CHECK: ld1.1d { v0, v1, v2 }, [x0] 304; CHECK-NEXT: ret 305 %tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) 306 ret %struct.__neon_float64x1x3_t %tmp2 307} 308 309define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind { 310; CHECK-LABEL: ld4_1df64 311; Make sure we are using the operands defined by the ABI 312; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0] 313; CHECK-NEXT: ret 314 %tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) 315 ret %struct.__neon_float64x1x4_t %tmp2 316} 317 318declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly 319declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly 320declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly 321 322 323define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind { 324; Make sure we are using the operands defined by the ABI 325; CHECK: ld2lane_16b 326; CHECK: ld2.b { v0, v1 }[1], [x0] 327; CHECK-NEXT: ret 328 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A) 329 ret %struct.__neon_int8x16x2_t %tmp2 330} 331 332define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind { 333; Make sure we are using the operands defined by the ABI 334; CHECK: ld3lane_16b 335; CHECK: ld3.b { v0, v1, v2 }[1], [x0] 336; CHECK-NEXT: ret 337 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A) 338 ret %struct.__neon_int8x16x3_t %tmp2 339} 340 341define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind { 342; Make sure we are using the operands defined by the ABI 343; CHECK: ld4lane_16b 344; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0] 345; CHECK-NEXT: ret 346 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A) 347 ret %struct.__neon_int8x16x4_t %tmp2 348} 349 350declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly 351declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly 352declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly 353 354define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind { 355; Make sure we are using the operands defined by the ABI 356; CHECK: ld2lane_8h 357; CHECK: ld2.h { v0, v1 }[1], [x0] 358; CHECK-NEXT: ret 359 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A) 360 ret %struct.__neon_int16x8x2_t %tmp2 361} 362 363define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind { 364; Make sure we are using the operands defined by the ABI 365; CHECK: ld3lane_8h 366; CHECK: ld3.h { v0, v1, v2 }[1], [x0] 367; CHECK-NEXT: ret 368 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A) 369 ret %struct.__neon_int16x8x3_t %tmp2 370} 371 372define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind { 373; Make sure we are using the operands defined by the ABI 374; CHECK: ld4lane_8h 375; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0] 376; CHECK-NEXT: ret 377 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A) 378 ret %struct.__neon_int16x8x4_t %tmp2 379} 380 381declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly 382declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly 383declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly 384 385define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind { 386; Make sure we are using the operands defined by the ABI 387; CHECK: ld2lane_4s 388; CHECK: ld2.s { v0, v1 }[1], [x0] 389; CHECK-NEXT: ret 390 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A) 391 ret %struct.__neon_int32x4x2_t %tmp2 392} 393 394define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind { 395; Make sure we are using the operands defined by the ABI 396; CHECK: ld3lane_4s 397; CHECK: ld3.s { v0, v1, v2 }[1], [x0] 398; CHECK-NEXT: ret 399 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A) 400 ret %struct.__neon_int32x4x3_t %tmp2 401} 402 403define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind { 404; Make sure we are using the operands defined by the ABI 405; CHECK: ld4lane_4s 406; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0] 407; CHECK-NEXT: ret 408 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A) 409 ret %struct.__neon_int32x4x4_t %tmp2 410} 411 412declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly 413declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly 414declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly 415 416define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind { 417; Make sure we are using the operands defined by the ABI 418; CHECK: ld2lane_2d 419; CHECK: ld2.d { v0, v1 }[1], [x0] 420; CHECK-NEXT: ret 421 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A) 422 ret %struct.__neon_int64x2x2_t %tmp2 423} 424 425define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind { 426; Make sure we are using the operands defined by the ABI 427; CHECK: ld3lane_2d 428; CHECK: ld3.d { v0, v1, v2 }[1], [x0] 429; CHECK-NEXT: ret 430 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A) 431 ret %struct.__neon_int64x2x3_t %tmp2 432} 433 434define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind { 435; Make sure we are using the operands defined by the ABI 436; CHECK: ld4lane_2d 437; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0] 438; CHECK-NEXT: ret 439 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A) 440 ret %struct.__neon_int64x2x4_t %tmp2 441} 442 443declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly 444declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly 445declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly 446 447define <8 x i8> @ld1r_8b(i8* %bar) { 448; CHECK: ld1r_8b 449; Make sure we are using the operands defined by the ABI 450; CHECK: ld1r.8b { v0 }, [x0] 451; CHECK-NEXT: ret 452 %tmp1 = load i8, i8* %bar 453 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 454 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 455 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2 456 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3 457 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4 458 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5 459 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6 460 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7 461 ret <8 x i8> %tmp9 462} 463 464define <16 x i8> @ld1r_16b(i8* %bar) { 465; CHECK: ld1r_16b 466; Make sure we are using the operands defined by the ABI 467; CHECK: ld1r.16b { v0 }, [x0] 468; CHECK-NEXT: ret 469 %tmp1 = load i8, i8* %bar 470 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 471 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 472 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2 473 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3 474 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4 475 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5 476 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6 477 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7 478 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8 479 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9 480 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10 481 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11 482 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12 483 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13 484 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14 485 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15 486 ret <16 x i8> %tmp17 487} 488 489define <4 x i16> @ld1r_4h(i16* %bar) { 490; CHECK: ld1r_4h 491; Make sure we are using the operands defined by the ABI 492; CHECK: ld1r.4h { v0 }, [x0] 493; CHECK-NEXT: ret 494 %tmp1 = load i16, i16* %bar 495 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 496 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 497 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2 498 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3 499 ret <4 x i16> %tmp5 500} 501 502define <8 x i16> @ld1r_8h(i16* %bar) { 503; CHECK: ld1r_8h 504; Make sure we are using the operands defined by the ABI 505; CHECK: ld1r.8h { v0 }, [x0] 506; CHECK-NEXT: ret 507 %tmp1 = load i16, i16* %bar 508 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 509 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 510 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2 511 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3 512 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4 513 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5 514 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6 515 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7 516 ret <8 x i16> %tmp9 517} 518 519define <2 x i32> @ld1r_2s(i32* %bar) { 520; CHECK: ld1r_2s 521; Make sure we are using the operands defined by the ABI 522; CHECK: ld1r.2s { v0 }, [x0] 523; CHECK-NEXT: ret 524 %tmp1 = load i32, i32* %bar 525 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0 526 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 527 ret <2 x i32> %tmp3 528} 529 530define <4 x i32> @ld1r_4s(i32* %bar) { 531; CHECK: ld1r_4s 532; Make sure we are using the operands defined by the ABI 533; CHECK: ld1r.4s { v0 }, [x0] 534; CHECK-NEXT: ret 535 %tmp1 = load i32, i32* %bar 536 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0 537 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 538 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2 539 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3 540 ret <4 x i32> %tmp5 541} 542 543define <2 x i64> @ld1r_2d(i64* %bar) { 544; CHECK: ld1r_2d 545; Make sure we are using the operands defined by the ABI 546; CHECK: ld1r.2d { v0 }, [x0] 547; CHECK-NEXT: ret 548 %tmp1 = load i64, i64* %bar 549 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0 550 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 551 ret <2 x i64> %tmp3 552} 553 554define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind { 555; CHECK: ld2r_8b 556; Make sure we are using the operands defined by the ABI 557; CHECK: ld2r.8b { v0, v1 }, [x0] 558; CHECK-NEXT: ret 559 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) 560 ret %struct.__neon_int8x8x2_t %tmp2 561} 562 563define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind { 564; CHECK: ld3r_8b 565; Make sure we are using the operands defined by the ABI 566; CHECK: ld3r.8b { v0, v1, v2 }, [x0] 567; CHECK-NEXT: ret 568 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) 569 ret %struct.__neon_int8x8x3_t %tmp2 570} 571 572define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind { 573; CHECK: ld4r_8b 574; Make sure we are using the operands defined by the ABI 575; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0] 576; CHECK-NEXT: ret 577 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) 578 ret %struct.__neon_int8x8x4_t %tmp2 579} 580 581declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly 582declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly 583declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly 584 585define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind { 586; CHECK: ld2r_16b 587; Make sure we are using the operands defined by the ABI 588; CHECK: ld2r.16b { v0, v1 }, [x0] 589; CHECK-NEXT: ret 590 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) 591 ret %struct.__neon_int8x16x2_t %tmp2 592} 593 594define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind { 595; CHECK: ld3r_16b 596; Make sure we are using the operands defined by the ABI 597; CHECK: ld3r.16b { v0, v1, v2 }, [x0] 598; CHECK-NEXT: ret 599 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) 600 ret %struct.__neon_int8x16x3_t %tmp2 601} 602 603define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind { 604; CHECK: ld4r_16b 605; Make sure we are using the operands defined by the ABI 606; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0] 607; CHECK-NEXT: ret 608 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) 609 ret %struct.__neon_int8x16x4_t %tmp2 610} 611 612declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly 613declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly 614declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly 615 616define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind { 617; CHECK: ld2r_4h 618; Make sure we are using the operands defined by the ABI 619; CHECK: ld2r.4h { v0, v1 }, [x0] 620; CHECK-NEXT: ret 621 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) 622 ret %struct.__neon_int16x4x2_t %tmp2 623} 624 625define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind { 626; CHECK: ld3r_4h 627; Make sure we are using the operands defined by the ABI 628; CHECK: ld3r.4h { v0, v1, v2 }, [x0] 629; CHECK-NEXT: ret 630 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) 631 ret %struct.__neon_int16x4x3_t %tmp2 632} 633 634define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind { 635; CHECK: ld4r_4h 636; Make sure we are using the operands defined by the ABI 637; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0] 638; CHECK-NEXT: ret 639 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) 640 ret %struct.__neon_int16x4x4_t %tmp2 641} 642 643declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly 644declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly 645declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly 646 647define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind { 648; CHECK: ld2r_8h 649; Make sure we are using the operands defined by the ABI 650; CHECK: ld2r.8h { v0, v1 }, [x0] 651; CHECK-NEXT: ret 652 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) 653 ret %struct.__neon_int16x8x2_t %tmp2 654} 655 656define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind { 657; CHECK: ld3r_8h 658; Make sure we are using the operands defined by the ABI 659; CHECK: ld3r.8h { v0, v1, v2 }, [x0] 660; CHECK-NEXT: ret 661 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) 662 ret %struct.__neon_int16x8x3_t %tmp2 663} 664 665define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind { 666; CHECK: ld4r_8h 667; Make sure we are using the operands defined by the ABI 668; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0] 669; CHECK-NEXT: ret 670 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) 671 ret %struct.__neon_int16x8x4_t %tmp2 672} 673 674declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly 675declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly 676declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly 677 678define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind { 679; CHECK: ld2r_2s 680; Make sure we are using the operands defined by the ABI 681; CHECK: ld2r.2s { v0, v1 }, [x0] 682; CHECK-NEXT: ret 683 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) 684 ret %struct.__neon_int32x2x2_t %tmp2 685} 686 687define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind { 688; CHECK: ld3r_2s 689; Make sure we are using the operands defined by the ABI 690; CHECK: ld3r.2s { v0, v1, v2 }, [x0] 691; CHECK-NEXT: ret 692 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) 693 ret %struct.__neon_int32x2x3_t %tmp2 694} 695 696define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind { 697; CHECK: ld4r_2s 698; Make sure we are using the operands defined by the ABI 699; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0] 700; CHECK-NEXT: ret 701 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) 702 ret %struct.__neon_int32x2x4_t %tmp2 703} 704 705declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly 706declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly 707declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly 708 709define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind { 710; CHECK: ld2r_4s 711; Make sure we are using the operands defined by the ABI 712; CHECK: ld2r.4s { v0, v1 }, [x0] 713; CHECK-NEXT: ret 714 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) 715 ret %struct.__neon_int32x4x2_t %tmp2 716} 717 718define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind { 719; CHECK: ld3r_4s 720; Make sure we are using the operands defined by the ABI 721; CHECK: ld3r.4s { v0, v1, v2 }, [x0] 722; CHECK-NEXT: ret 723 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) 724 ret %struct.__neon_int32x4x3_t %tmp2 725} 726 727define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind { 728; CHECK: ld4r_4s 729; Make sure we are using the operands defined by the ABI 730; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0] 731; CHECK-NEXT: ret 732 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) 733 ret %struct.__neon_int32x4x4_t %tmp2 734} 735 736declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly 737declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly 738declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly 739 740define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind { 741; CHECK: ld2r_1d 742; Make sure we are using the operands defined by the ABI 743; CHECK: ld2r.1d { v0, v1 }, [x0] 744; CHECK-NEXT: ret 745 %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) 746 ret %struct.__neon_int64x1x2_t %tmp2 747} 748 749define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind { 750; CHECK: ld3r_1d 751; Make sure we are using the operands defined by the ABI 752; CHECK: ld3r.1d { v0, v1, v2 }, [x0] 753; CHECK-NEXT: ret 754 %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) 755 ret %struct.__neon_int64x1x3_t %tmp2 756} 757 758define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind { 759; CHECK: ld4r_1d 760; Make sure we are using the operands defined by the ABI 761; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0] 762; CHECK-NEXT: ret 763 %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) 764 ret %struct.__neon_int64x1x4_t %tmp2 765} 766 767declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly 768declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly 769declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly 770 771define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind { 772; CHECK: ld2r_2d 773; Make sure we are using the operands defined by the ABI 774; CHECK: ld2r.2d { v0, v1 }, [x0] 775; CHECK-NEXT: ret 776 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) 777 ret %struct.__neon_int64x2x2_t %tmp2 778} 779 780define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind { 781; CHECK: ld3r_2d 782; Make sure we are using the operands defined by the ABI 783; CHECK: ld3r.2d { v0, v1, v2 }, [x0] 784; CHECK-NEXT: ret 785 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) 786 ret %struct.__neon_int64x2x3_t %tmp2 787} 788 789define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind { 790; CHECK: ld4r_2d 791; Make sure we are using the operands defined by the ABI 792; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0] 793; CHECK-NEXT: ret 794 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) 795 ret %struct.__neon_int64x2x4_t %tmp2 796} 797 798declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly 799declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly 800declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly 801 802define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) { 803; CHECK-LABEL: ld1_16b 804; Make sure we are using the operands defined by the ABI 805; CHECK: ld1.b { v0 }[0], [x0] 806; CHECK-NEXT: ret 807 %tmp1 = load i8, i8* %bar 808 %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0 809 ret <16 x i8> %tmp2 810} 811 812define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) { 813; CHECK-LABEL: ld1_8h 814; Make sure we are using the operands defined by the ABI 815; CHECK: ld1.h { v0 }[0], [x0] 816; CHECK-NEXT: ret 817 %tmp1 = load i16, i16* %bar 818 %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0 819 ret <8 x i16> %tmp2 820} 821 822define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) { 823; CHECK-LABEL: ld1_4s 824; Make sure we are using the operands defined by the ABI 825; CHECK: ld1.s { v0 }[0], [x0] 826; CHECK-NEXT: ret 827 %tmp1 = load i32, i32* %bar 828 %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0 829 ret <4 x i32> %tmp2 830} 831 832define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) { 833; CHECK-LABEL: ld1_4s_float: 834; Make sure we are using the operands defined by the ABI 835; CHECK: ld1.s { v0 }[0], [x0] 836; CHECK-NEXT: ret 837 %tmp1 = load float, float* %bar 838 %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0 839 ret <4 x float> %tmp2 840} 841 842define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) { 843; CHECK-LABEL: ld1_2d 844; Make sure we are using the operands defined by the ABI 845; CHECK: ld1.d { v0 }[0], [x0] 846; CHECK-NEXT: ret 847 %tmp1 = load i64, i64* %bar 848 %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0 849 ret <2 x i64> %tmp2 850} 851 852define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) { 853; CHECK-LABEL: ld1_2d_double: 854; Make sure we are using the operands defined by the ABI 855; CHECK: ld1.d { v0 }[0], [x0] 856; CHECK-NEXT: ret 857 %tmp1 = load double, double* %bar 858 %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0 859 ret <2 x double> %tmp2 860} 861 862define <1 x i64> @ld1_1d(<1 x i64>* %p) { 863; CHECK-LABEL: ld1_1d 864; Make sure we are using the operands defined by the ABI 865; CHECK: ldr [[REG:d[0-9]+]], [x0] 866; CHECK-NEXT: ret 867 %tmp = load <1 x i64>, <1 x i64>* %p, align 8 868 ret <1 x i64> %tmp 869} 870 871define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) { 872; CHECK-LABEL: ld1_8b 873; Make sure we are using the operands defined by the ABI 874; CHECK: ld1.b { v0 }[0], [x0] 875; CHECK-NEXT: ret 876 %tmp1 = load i8, i8* %bar 877 %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 878 ret <8 x i8> %tmp2 879} 880 881define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) { 882; CHECK-LABEL: ld1_4h 883; Make sure we are using the operands defined by the ABI 884; CHECK: ld1.h { v0 }[0], [x0] 885; CHECK-NEXT: ret 886 %tmp1 = load i16, i16* %bar 887 %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0 888 ret <4 x i16> %tmp2 889} 890 891define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) { 892; CHECK-LABEL: ld1_2s: 893; Make sure we are using the operands defined by the ABI 894; CHECK: ld1.s { v0 }[0], [x0] 895; CHECK-NEXT: ret 896 %tmp1 = load i32, i32* %bar 897 %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0 898 ret <2 x i32> %tmp2 899} 900 901define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) { 902; CHECK-LABEL: ld1_2s_float: 903; Make sure we are using the operands defined by the ABI 904; CHECK: ld1.s { v0 }[0], [x0] 905; CHECK-NEXT: ret 906 %tmp1 = load float, float* %bar 907 %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0 908 ret <2 x float> %tmp2 909} 910 911 912; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s 913define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp { 914entry: 915; CHECK: ld1r_2s_from_dup 916; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0] 917; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1] 918; CHECK-NEXT: ushll.8h [[ARG1]], [[ARG1]], #0 919; CHECK-NEXT: ushll.8h [[ARG2]], [[ARG2]], #0 920; CHECK-NEXT: sub.4h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]] 921; CHECK-NEXT: str d[[RESREGNUM]], [x2] 922; CHECK-NEXT: ret 923 %tmp = bitcast i8* %a to i32* 924 %tmp1 = load i32, i32* %tmp, align 4 925 %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 926 %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer 927 %tmp3 = bitcast <2 x i32> %lane to <8 x i8> 928 %tmp4 = bitcast i8* %b to i32* 929 %tmp5 = load i32, i32* %tmp4, align 4 930 %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0 931 %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer 932 %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8> 933 %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16> 934 %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16> 935 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i 936 %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64> 937 %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer 938 %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16> 939 %tmp10 = bitcast i16* %diff to <4 x i16>* 940 store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8 941 ret void 942} 943 944; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal 945define <4 x float> @ld1r_4s_float(float* nocapture %x) { 946entry: 947; CHECK-LABEL: ld1r_4s_float 948; Make sure we are using the operands defined by the ABI 949; CHECK: ld1r.4s { v0 }, [x0] 950; CHECK-NEXT: ret 951 %tmp = load float, float* %x, align 4 952 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 953 %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1 954 %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2 955 %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3 956 ret <4 x float> %tmp4 957} 958 959define <2 x float> @ld1r_2s_float(float* nocapture %x) { 960entry: 961; CHECK-LABEL: ld1r_2s_float 962; Make sure we are using the operands defined by the ABI 963; CHECK: ld1r.2s { v0 }, [x0] 964; CHECK-NEXT: ret 965 %tmp = load float, float* %x, align 4 966 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 967 %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1 968 ret <2 x float> %tmp2 969} 970 971define <2 x double> @ld1r_2d_double(double* nocapture %x) { 972entry: 973; CHECK-LABEL: ld1r_2d_double 974; Make sure we are using the operands defined by the ABI 975; CHECK: ld1r.2d { v0 }, [x0] 976; CHECK-NEXT: ret 977 %tmp = load double, double* %x, align 4 978 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 979 %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1 980 ret <2 x double> %tmp2 981} 982 983define <1 x double> @ld1r_1d_double(double* nocapture %x) { 984entry: 985; CHECK-LABEL: ld1r_1d_double 986; Make sure we are using the operands defined by the ABI 987; CHECK: ldr d0, [x0] 988; CHECK-NEXT: ret 989 %tmp = load double, double* %x, align 4 990 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0 991 ret <1 x double> %tmp1 992} 993 994define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) { 995entry: 996; CHECK-LABEL: ld1r_4s_float_shuff 997; Make sure we are using the operands defined by the ABI 998; CHECK: ld1r.4s { v0 }, [x0] 999; CHECK-NEXT: ret 1000 %tmp = load float, float* %x, align 4 1001 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 1002 %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 1003 ret <4 x float> %lane 1004} 1005 1006define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) { 1007entry: 1008; CHECK-LABEL: ld1r_2s_float_shuff 1009; Make sure we are using the operands defined by the ABI 1010; CHECK: ld1r.2s { v0 }, [x0] 1011; CHECK-NEXT: ret 1012 %tmp = load float, float* %x, align 4 1013 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 1014 %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 1015 ret <2 x float> %lane 1016} 1017 1018define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) { 1019entry: 1020; CHECK-LABEL: ld1r_2d_double_shuff 1021; Make sure we are using the operands defined by the ABI 1022; CHECK: ld1r.2d { v0 }, [x0] 1023; CHECK-NEXT: ret 1024 %tmp = load double, double* %x, align 4 1025 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 1026 %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer 1027 ret <2 x double> %lane 1028} 1029 1030define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) { 1031entry: 1032; CHECK-LABEL: ld1r_1d_double_shuff 1033; Make sure we are using the operands defined by the ABI 1034; CHECK: ldr d0, [x0] 1035; CHECK-NEXT: ret 1036 %tmp = load double, double* %x, align 4 1037 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0 1038 %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer 1039 ret <1 x double> %lane 1040} 1041 1042%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } 1043%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } 1044%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } 1045 1046declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly 1047declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly 1048declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly 1049declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly 1050declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly 1051declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly 1052 1053define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) { 1054; CHECK-LABEL: ld1_x2_v8i8: 1055; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1056 %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr) 1057 ret %struct.__neon_int8x8x2_t %val 1058} 1059 1060define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) { 1061; CHECK-LABEL: ld1_x2_v4i16: 1062; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1063 %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr) 1064 ret %struct.__neon_int16x4x2_t %val 1065} 1066 1067define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) { 1068; CHECK-LABEL: ld1_x2_v2i32: 1069; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1070 %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr) 1071 ret %struct.__neon_int32x2x2_t %val 1072} 1073 1074define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) { 1075; CHECK-LABEL: ld1_x2_v2f32: 1076; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1077 %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr) 1078 ret %struct.__neon_float32x2x2_t %val 1079} 1080 1081define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) { 1082; CHECK-LABEL: ld1_x2_v1i64: 1083; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1084 %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr) 1085 ret %struct.__neon_int64x1x2_t %val 1086} 1087 1088define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) { 1089; CHECK-LABEL: ld1_x2_v1f64: 1090; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1091 %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr) 1092 ret %struct.__neon_float64x1x2_t %val 1093} 1094 1095 1096%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } 1097%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } 1098%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } 1099 1100%struct.__neon_float64x2x2_t = type { <2 x double>, <2 x double> } 1101%struct.__neon_float64x2x3_t = type { <2 x double>, <2 x double>, <2 x double> } 1102%struct.__neon_float64x2x4_t = type { <2 x double>, <2 x double>, <2 x double>, <2 x double> } 1103 1104declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly 1105declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly 1106declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly 1107declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly 1108declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly 1109declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly 1110 1111define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) { 1112; CHECK-LABEL: ld1_x2_v16i8: 1113; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1114 %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr) 1115 ret %struct.__neon_int8x16x2_t %val 1116} 1117 1118define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) { 1119; CHECK-LABEL: ld1_x2_v8i16: 1120; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1121 %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr) 1122 ret %struct.__neon_int16x8x2_t %val 1123} 1124 1125define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) { 1126; CHECK-LABEL: ld1_x2_v4i32: 1127; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1128 %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr) 1129 ret %struct.__neon_int32x4x2_t %val 1130} 1131 1132define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) { 1133; CHECK-LABEL: ld1_x2_v4f32: 1134; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1135 %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr) 1136 ret %struct.__neon_float32x4x2_t %val 1137} 1138 1139define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) { 1140; CHECK-LABEL: ld1_x2_v2i64: 1141; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1142 %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr) 1143 ret %struct.__neon_int64x2x2_t %val 1144} 1145 1146define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) { 1147; CHECK-LABEL: ld1_x2_v2f64: 1148; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1149 %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr) 1150 ret %struct.__neon_float64x2x2_t %val 1151} 1152 1153declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly 1154declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly 1155declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly 1156declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly 1157declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly 1158declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly 1159 1160define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) { 1161; CHECK-LABEL: ld1_x3_v8i8: 1162; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1163 %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr) 1164 ret %struct.__neon_int8x8x3_t %val 1165} 1166 1167define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) { 1168; CHECK-LABEL: ld1_x3_v4i16: 1169; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1170 %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr) 1171 ret %struct.__neon_int16x4x3_t %val 1172} 1173 1174define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) { 1175; CHECK-LABEL: ld1_x3_v2i32: 1176; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1177 %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr) 1178 ret %struct.__neon_int32x2x3_t %val 1179} 1180 1181define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) { 1182; CHECK-LABEL: ld1_x3_v2f32: 1183; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1184 %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr) 1185 ret %struct.__neon_float32x2x3_t %val 1186} 1187 1188define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) { 1189; CHECK-LABEL: ld1_x3_v1i64: 1190; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1191 %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr) 1192 ret %struct.__neon_int64x1x3_t %val 1193} 1194 1195define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) { 1196; CHECK-LABEL: ld1_x3_v1f64: 1197; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1198 %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr) 1199 ret %struct.__neon_float64x1x3_t %val 1200} 1201 1202declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly 1203declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly 1204declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly 1205declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly 1206declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly 1207declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly 1208 1209define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) { 1210; CHECK-LABEL: ld1_x3_v16i8: 1211; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1212 %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr) 1213 ret %struct.__neon_int8x16x3_t %val 1214} 1215 1216define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) { 1217; CHECK-LABEL: ld1_x3_v8i16: 1218; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1219 %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr) 1220 ret %struct.__neon_int16x8x3_t %val 1221} 1222 1223define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) { 1224; CHECK-LABEL: ld1_x3_v4i32: 1225; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1226 %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr) 1227 ret %struct.__neon_int32x4x3_t %val 1228} 1229 1230define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) { 1231; CHECK-LABEL: ld1_x3_v4f32: 1232; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1233 %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr) 1234 ret %struct.__neon_float32x4x3_t %val 1235} 1236 1237define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) { 1238; CHECK-LABEL: ld1_x3_v2i64: 1239; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1240 %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr) 1241 ret %struct.__neon_int64x2x3_t %val 1242} 1243 1244define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) { 1245; CHECK-LABEL: ld1_x3_v2f64: 1246; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1247 %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr) 1248 ret %struct.__neon_float64x2x3_t %val 1249} 1250 1251declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly 1252declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly 1253declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly 1254declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly 1255declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly 1256declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly 1257 1258define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) { 1259; CHECK-LABEL: ld1_x4_v8i8: 1260; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1261 %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr) 1262 ret %struct.__neon_int8x8x4_t %val 1263} 1264 1265define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) { 1266; CHECK-LABEL: ld1_x4_v4i16: 1267; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1268 %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr) 1269 ret %struct.__neon_int16x4x4_t %val 1270} 1271 1272define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) { 1273; CHECK-LABEL: ld1_x4_v2i32: 1274; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1275 %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr) 1276 ret %struct.__neon_int32x2x4_t %val 1277} 1278 1279define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) { 1280; CHECK-LABEL: ld1_x4_v2f32: 1281; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1282 %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr) 1283 ret %struct.__neon_float32x2x4_t %val 1284} 1285 1286define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) { 1287; CHECK-LABEL: ld1_x4_v1i64: 1288; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1289 %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr) 1290 ret %struct.__neon_int64x1x4_t %val 1291} 1292 1293define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) { 1294; CHECK-LABEL: ld1_x4_v1f64: 1295; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1296 %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr) 1297 ret %struct.__neon_float64x1x4_t %val 1298} 1299 1300declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly 1301declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly 1302declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly 1303declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly 1304declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly 1305declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly 1306 1307define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) { 1308; CHECK-LABEL: ld1_x4_v16i8: 1309; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1310 %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr) 1311 ret %struct.__neon_int8x16x4_t %val 1312} 1313 1314define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) { 1315; CHECK-LABEL: ld1_x4_v8i16: 1316; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1317 %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr) 1318 ret %struct.__neon_int16x8x4_t %val 1319} 1320 1321define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) { 1322; CHECK-LABEL: ld1_x4_v4i32: 1323; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1324 %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr) 1325 ret %struct.__neon_int32x4x4_t %val 1326} 1327 1328define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) { 1329; CHECK-LABEL: ld1_x4_v4f32: 1330; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1331 %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr) 1332 ret %struct.__neon_float32x4x4_t %val 1333} 1334 1335define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) { 1336; CHECK-LABEL: ld1_x4_v2i64: 1337; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1338 %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr) 1339 ret %struct.__neon_int64x2x4_t %val 1340} 1341 1342define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) { 1343; CHECK-LABEL: ld1_x4_v2f64: 1344; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1345 %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr) 1346 ret %struct.__neon_float64x2x4_t %val 1347} 1348