1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(half* %base) { 5; CHECK-LABEL: test_vld1q_f16: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrh.u16 q0, [r0] 8; CHECK-NEXT: bx lr 9entry: 10 %0 = bitcast half* %base to <8 x half>* 11 %1 = load <8 x half>, <8 x half>* %0, align 2 12 ret <8 x half> %1 13} 14 15define arm_aapcs_vfpcc <4 x float> @test_vld1q_f32(float* %base) { 16; CHECK-LABEL: test_vld1q_f32: 17; CHECK: @ %bb.0: @ %entry 18; CHECK-NEXT: vldrw.u32 q0, [r0] 19; CHECK-NEXT: bx lr 20entry: 21 %0 = bitcast float* %base to <4 x float>* 22 %1 = load <4 x float>, <4 x float>* %0, align 4 23 ret <4 x float> %1 24} 25 26define arm_aapcs_vfpcc <16 x i8> @test_vld1q_s8(i8* %base) { 27; CHECK-LABEL: test_vld1q_s8: 28; CHECK: @ %bb.0: @ %entry 29; CHECK-NEXT: vldrb.u8 q0, [r0] 30; CHECK-NEXT: bx lr 31entry: 32 %0 = bitcast i8* %base to <16 x i8>* 33 %1 = load <16 x i8>, <16 x i8>* %0, align 1 34 ret <16 x i8> %1 35} 36 37define arm_aapcs_vfpcc <8 x i16> @test_vld1q_s16(i16* %base) { 38; CHECK-LABEL: test_vld1q_s16: 39; CHECK: @ %bb.0: @ %entry 40; CHECK-NEXT: vldrh.u16 q0, [r0] 41; CHECK-NEXT: bx lr 42entry: 43 %0 = bitcast i16* %base to <8 x i16>* 44 %1 = load <8 x i16>, <8 x i16>* %0, align 2 45 ret <8 x i16> %1 46} 47 48define arm_aapcs_vfpcc <4 x i32> @test_vld1q_s32(i32* %base) { 49; CHECK-LABEL: test_vld1q_s32: 50; CHECK: @ %bb.0: @ %entry 51; CHECK-NEXT: vldrw.u32 q0, [r0] 52; CHECK-NEXT: bx lr 53entry: 54 %0 = bitcast i32* %base to <4 x i32>* 55 %1 = load <4 x i32>, <4 x i32>* %0, align 4 56 ret <4 x i32> %1 57} 58 59define arm_aapcs_vfpcc <16 x i8> @test_vld1q_u8(i8* %base) { 60; CHECK-LABEL: test_vld1q_u8: 61; CHECK: @ %bb.0: @ %entry 62; CHECK-NEXT: vldrb.u8 q0, [r0] 63; CHECK-NEXT: bx lr 64entry: 65 %0 = bitcast i8* %base to <16 x i8>* 66 %1 = load <16 x i8>, <16 x i8>* %0, align 1 67 ret <16 x i8> %1 68} 69 70define arm_aapcs_vfpcc <8 x i16> @test_vld1q_u16(i16* %base) { 71; CHECK-LABEL: test_vld1q_u16: 72; CHECK: @ %bb.0: @ %entry 73; CHECK-NEXT: vldrh.u16 q0, [r0] 74; CHECK-NEXT: bx lr 75entry: 76 %0 = bitcast i16* %base to <8 x i16>* 77 %1 = load <8 x i16>, <8 x i16>* %0, align 2 78 ret <8 x i16> %1 79} 80 81define arm_aapcs_vfpcc <4 x i32> @test_vld1q_u32(i32* %base) { 82; CHECK-LABEL: test_vld1q_u32: 83; CHECK: @ %bb.0: @ %entry 84; CHECK-NEXT: vldrw.u32 q0, [r0] 85; CHECK-NEXT: bx lr 86entry: 87 %0 = bitcast i32* %base to <4 x i32>* 88 %1 = load <4 x i32>, <4 x i32>* %0, align 4 89 ret <4 x i32> %1 90} 91 92define arm_aapcs_vfpcc <8 x half> @test_vld1q_z_f16(half* %base, i16 zeroext %p) { 93; CHECK-LABEL: test_vld1q_z_f16: 94; CHECK: @ %bb.0: @ %entry 95; CHECK-NEXT: vmsr p0, r1 96; CHECK-NEXT: vpst 97; CHECK-NEXT: vldrht.u16 q0, [r0] 98; CHECK-NEXT: bx lr 99entry: 100 %0 = bitcast half* %base to <8 x half>* 101 %1 = zext i16 %p to i32 102 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 103 %3 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %2, <8 x half> zeroinitializer) 104 ret <8 x half> %3 105} 106 107declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 108 109declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>) 110 111define arm_aapcs_vfpcc <4 x float> @test_vld1q_z_f32(float* %base, i16 zeroext %p) { 112; CHECK-LABEL: test_vld1q_z_f32: 113; CHECK: @ %bb.0: @ %entry 114; CHECK-NEXT: vmsr p0, r1 115; CHECK-NEXT: vpst 116; CHECK-NEXT: vldrwt.u32 q0, [r0] 117; CHECK-NEXT: bx lr 118entry: 119 %0 = bitcast float* %base to <4 x float>* 120 %1 = zext i16 %p to i32 121 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 122 %3 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %2, <4 x float> zeroinitializer) 123 ret <4 x float> %3 124} 125 126declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 127 128declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) 129 130define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_s8(i8* %base, i16 zeroext %p) { 131; CHECK-LABEL: test_vld1q_z_s8: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vmsr p0, r1 134; CHECK-NEXT: vpst 135; CHECK-NEXT: vldrbt.u8 q0, [r0] 136; CHECK-NEXT: bx lr 137entry: 138 %0 = bitcast i8* %base to <16 x i8>* 139 %1 = zext i16 %p to i32 140 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 141 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) 142 ret <16 x i8> %3 143} 144 145declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 146 147declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) 148 149define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_s16(i16* %base, i16 zeroext %p) { 150; CHECK-LABEL: test_vld1q_z_s16: 151; CHECK: @ %bb.0: @ %entry 152; CHECK-NEXT: vmsr p0, r1 153; CHECK-NEXT: vpst 154; CHECK-NEXT: vldrht.u16 q0, [r0] 155; CHECK-NEXT: bx lr 156entry: 157 %0 = bitcast i16* %base to <8 x i16>* 158 %1 = zext i16 %p to i32 159 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 160 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer) 161 ret <8 x i16> %3 162} 163 164declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) 165 166define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_s32(i32* %base, i16 zeroext %p) { 167; CHECK-LABEL: test_vld1q_z_s32: 168; CHECK: @ %bb.0: @ %entry 169; CHECK-NEXT: vmsr p0, r1 170; CHECK-NEXT: vpst 171; CHECK-NEXT: vldrwt.u32 q0, [r0] 172; CHECK-NEXT: bx lr 173entry: 174 %0 = bitcast i32* %base to <4 x i32>* 175 %1 = zext i16 %p to i32 176 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 177 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer) 178 ret <4 x i32> %3 179} 180 181declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) 182 183define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_u8(i8* %base, i16 zeroext %p) { 184; CHECK-LABEL: test_vld1q_z_u8: 185; CHECK: @ %bb.0: @ %entry 186; CHECK-NEXT: vmsr p0, r1 187; CHECK-NEXT: vpst 188; CHECK-NEXT: vldrbt.u8 q0, [r0] 189; CHECK-NEXT: bx lr 190entry: 191 %0 = bitcast i8* %base to <16 x i8>* 192 %1 = zext i16 %p to i32 193 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 194 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) 195 ret <16 x i8> %3 196} 197 198define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_u16(i16* %base, i16 zeroext %p) { 199; CHECK-LABEL: test_vld1q_z_u16: 200; CHECK: @ %bb.0: @ %entry 201; CHECK-NEXT: vmsr p0, r1 202; CHECK-NEXT: vpst 203; CHECK-NEXT: vldrht.u16 q0, [r0] 204; CHECK-NEXT: bx lr 205entry: 206 %0 = bitcast i16* %base to <8 x i16>* 207 %1 = zext i16 %p to i32 208 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 209 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer) 210 ret <8 x i16> %3 211} 212 213define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_u32(i32* %base, i16 zeroext %p) { 214; CHECK-LABEL: test_vld1q_z_u32: 215; CHECK: @ %bb.0: @ %entry 216; CHECK-NEXT: vmsr p0, r1 217; CHECK-NEXT: vpst 218; CHECK-NEXT: vldrwt.u32 q0, [r0] 219; CHECK-NEXT: bx lr 220entry: 221 %0 = bitcast i32* %base to <4 x i32>* 222 %1 = zext i16 %p to i32 223 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 224 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer) 225 ret <4 x i32> %3 226} 227 228define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_s8(i8* %base) { 229; CHECK-LABEL: test_vldrbq_s8: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vldrb.u8 q0, [r0] 232; CHECK-NEXT: bx lr 233entry: 234 %0 = bitcast i8* %base to <16 x i8>* 235 %1 = load <16 x i8>, <16 x i8>* %0, align 1 236 ret <16 x i8> %1 237} 238 239define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_s16(i8* %base) { 240; CHECK-LABEL: test_vldrbq_s16: 241; CHECK: @ %bb.0: @ %entry 242; CHECK-NEXT: vldrb.s16 q0, [r0] 243; CHECK-NEXT: bx lr 244entry: 245 %0 = bitcast i8* %base to <8 x i8>* 246 %1 = load <8 x i8>, <8 x i8>* %0, align 1 247 %2 = sext <8 x i8> %1 to <8 x i16> 248 ret <8 x i16> %2 249} 250 251define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_s32(i8* %base) { 252; CHECK-LABEL: test_vldrbq_s32: 253; CHECK: @ %bb.0: @ %entry 254; CHECK-NEXT: vldrb.s32 q0, [r0] 255; CHECK-NEXT: bx lr 256entry: 257 %0 = bitcast i8* %base to <4 x i8>* 258 %1 = load <4 x i8>, <4 x i8>* %0, align 1 259 %2 = sext <4 x i8> %1 to <4 x i32> 260 ret <4 x i32> %2 261} 262 263define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_u8(i8* %base) { 264; CHECK-LABEL: test_vldrbq_u8: 265; CHECK: @ %bb.0: @ %entry 266; CHECK-NEXT: vldrb.u8 q0, [r0] 267; CHECK-NEXT: bx lr 268entry: 269 %0 = bitcast i8* %base to <16 x i8>* 270 %1 = load <16 x i8>, <16 x i8>* %0, align 1 271 ret <16 x i8> %1 272} 273 274define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_u16(i8* %base) { 275; CHECK-LABEL: test_vldrbq_u16: 276; CHECK: @ %bb.0: @ %entry 277; CHECK-NEXT: vldrb.u16 q0, [r0] 278; CHECK-NEXT: bx lr 279entry: 280 %0 = bitcast i8* %base to <8 x i8>* 281 %1 = load <8 x i8>, <8 x i8>* %0, align 1 282 %2 = zext <8 x i8> %1 to <8 x i16> 283 ret <8 x i16> %2 284} 285 286define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_u32(i8* %base) { 287; CHECK-LABEL: test_vldrbq_u32: 288; CHECK: @ %bb.0: @ %entry 289; CHECK-NEXT: vldrb.u32 q0, [r0] 290; CHECK-NEXT: bx lr 291entry: 292 %0 = bitcast i8* %base to <4 x i8>* 293 %1 = load <4 x i8>, <4 x i8>* %0, align 1 294 %2 = zext <4 x i8> %1 to <4 x i32> 295 ret <4 x i32> %2 296} 297 298define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_s8(i8* %base, i16 zeroext %p) { 299; CHECK-LABEL: test_vldrbq_z_s8: 300; CHECK: @ %bb.0: @ %entry 301; CHECK-NEXT: vmsr p0, r1 302; CHECK-NEXT: vpst 303; CHECK-NEXT: vldrbt.u8 q0, [r0] 304; CHECK-NEXT: bx lr 305entry: 306 %0 = bitcast i8* %base to <16 x i8>* 307 %1 = zext i16 %p to i32 308 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 309 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) 310 ret <16 x i8> %3 311} 312 313define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_s16(i8* %base, i16 zeroext %p) { 314; CHECK-LABEL: test_vldrbq_z_s16: 315; CHECK: @ %bb.0: @ %entry 316; CHECK-NEXT: vmsr p0, r1 317; CHECK-NEXT: vpst 318; CHECK-NEXT: vldrbt.s16 q0, [r0] 319; CHECK-NEXT: bx lr 320entry: 321 %0 = bitcast i8* %base to <8 x i8>* 322 %1 = zext i16 %p to i32 323 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 324 %3 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %2, <8 x i8> zeroinitializer) 325 %4 = sext <8 x i8> %3 to <8 x i16> 326 ret <8 x i16> %4 327} 328 329declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) 330 331define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_s32(i8* %base, i16 zeroext %p) { 332; CHECK-LABEL: test_vldrbq_z_s32: 333; CHECK: @ %bb.0: @ %entry 334; CHECK-NEXT: vmsr p0, r1 335; CHECK-NEXT: vpst 336; CHECK-NEXT: vldrbt.s32 q0, [r0] 337; CHECK-NEXT: bx lr 338entry: 339 %0 = bitcast i8* %base to <4 x i8>* 340 %1 = zext i16 %p to i32 341 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 342 %3 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %2, <4 x i8> zeroinitializer) 343 %4 = sext <4 x i8> %3 to <4 x i32> 344 ret <4 x i32> %4 345} 346 347declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>) 348 349define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_u8(i8* %base, i16 zeroext %p) { 350; CHECK-LABEL: test_vldrbq_z_u8: 351; CHECK: @ %bb.0: @ %entry 352; CHECK-NEXT: vmsr p0, r1 353; CHECK-NEXT: vpst 354; CHECK-NEXT: vldrbt.u8 q0, [r0] 355; CHECK-NEXT: bx lr 356entry: 357 %0 = bitcast i8* %base to <16 x i8>* 358 %1 = zext i16 %p to i32 359 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 360 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer) 361 ret <16 x i8> %3 362} 363 364define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_u16(i8* %base, i16 zeroext %p) { 365; CHECK-LABEL: test_vldrbq_z_u16: 366; CHECK: @ %bb.0: @ %entry 367; CHECK-NEXT: vmsr p0, r1 368; CHECK-NEXT: vpst 369; CHECK-NEXT: vldrbt.u16 q0, [r0] 370; CHECK-NEXT: bx lr 371entry: 372 %0 = bitcast i8* %base to <8 x i8>* 373 %1 = zext i16 %p to i32 374 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 375 %3 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %2, <8 x i8> zeroinitializer) 376 %4 = zext <8 x i8> %3 to <8 x i16> 377 ret <8 x i16> %4 378} 379 380define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_u32(i8* %base, i16 zeroext %p) { 381; CHECK-LABEL: test_vldrbq_z_u32: 382; CHECK: @ %bb.0: @ %entry 383; CHECK-NEXT: vmsr p0, r1 384; CHECK-NEXT: vpst 385; CHECK-NEXT: vldrbt.u32 q0, [r0] 386; CHECK-NEXT: bx lr 387entry: 388 %0 = bitcast i8* %base to <4 x i8>* 389 %1 = zext i16 %p to i32 390 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 391 %3 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %2, <4 x i8> zeroinitializer) 392 %4 = zext <4 x i8> %3 to <4 x i32> 393 ret <4 x i32> %4 394} 395 396define arm_aapcs_vfpcc <8 x half> @test_vldrhq_f16(half* %base) { 397; CHECK-LABEL: test_vldrhq_f16: 398; CHECK: @ %bb.0: @ %entry 399; CHECK-NEXT: vldrh.u16 q0, [r0] 400; CHECK-NEXT: bx lr 401entry: 402 %0 = bitcast half* %base to <8 x half>* 403 %1 = load <8 x half>, <8 x half>* %0, align 2 404 ret <8 x half> %1 405} 406 407define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_s16(i16* %base) { 408; CHECK-LABEL: test_vldrhq_s16: 409; CHECK: @ %bb.0: @ %entry 410; CHECK-NEXT: vldrh.u16 q0, [r0] 411; CHECK-NEXT: bx lr 412entry: 413 %0 = bitcast i16* %base to <8 x i16>* 414 %1 = load <8 x i16>, <8 x i16>* %0, align 2 415 ret <8 x i16> %1 416} 417 418define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_s32(i16* %base) { 419; CHECK-LABEL: test_vldrhq_s32: 420; CHECK: @ %bb.0: @ %entry 421; CHECK-NEXT: vldrh.s32 q0, [r0] 422; CHECK-NEXT: bx lr 423entry: 424 %0 = bitcast i16* %base to <4 x i16>* 425 %1 = load <4 x i16>, <4 x i16>* %0, align 2 426 %2 = sext <4 x i16> %1 to <4 x i32> 427 ret <4 x i32> %2 428} 429 430define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_u16(i16* %base) { 431; CHECK-LABEL: test_vldrhq_u16: 432; CHECK: @ %bb.0: @ %entry 433; CHECK-NEXT: vldrh.u16 q0, [r0] 434; CHECK-NEXT: bx lr 435entry: 436 %0 = bitcast i16* %base to <8 x i16>* 437 %1 = load <8 x i16>, <8 x i16>* %0, align 2 438 ret <8 x i16> %1 439} 440 441define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_u32(i16* %base) { 442; CHECK-LABEL: test_vldrhq_u32: 443; CHECK: @ %bb.0: @ %entry 444; CHECK-NEXT: vldrh.u32 q0, [r0] 445; CHECK-NEXT: bx lr 446entry: 447 %0 = bitcast i16* %base to <4 x i16>* 448 %1 = load <4 x i16>, <4 x i16>* %0, align 2 449 %2 = zext <4 x i16> %1 to <4 x i32> 450 ret <4 x i32> %2 451} 452 453define arm_aapcs_vfpcc <8 x half> @test_vldrhq_z_f16(half* %base, i16 zeroext %p) { 454; CHECK-LABEL: test_vldrhq_z_f16: 455; CHECK: @ %bb.0: @ %entry 456; CHECK-NEXT: vmsr p0, r1 457; CHECK-NEXT: vpst 458; CHECK-NEXT: vldrht.u16 q0, [r0] 459; CHECK-NEXT: bx lr 460entry: 461 %0 = bitcast half* %base to <8 x half>* 462 %1 = zext i16 %p to i32 463 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 464 %3 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %2, <8 x half> zeroinitializer) 465 ret <8 x half> %3 466} 467 468define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_s16(i16* %base, i16 zeroext %p) { 469; CHECK-LABEL: test_vldrhq_z_s16: 470; CHECK: @ %bb.0: @ %entry 471; CHECK-NEXT: vmsr p0, r1 472; CHECK-NEXT: vpst 473; CHECK-NEXT: vldrht.u16 q0, [r0] 474; CHECK-NEXT: bx lr 475entry: 476 %0 = bitcast i16* %base to <8 x i16>* 477 %1 = zext i16 %p to i32 478 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 479 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer) 480 ret <8 x i16> %3 481} 482 483define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_s32(i16* %base, i16 zeroext %p) { 484; CHECK-LABEL: test_vldrhq_z_s32: 485; CHECK: @ %bb.0: @ %entry 486; CHECK-NEXT: vmsr p0, r1 487; CHECK-NEXT: vpst 488; CHECK-NEXT: vldrht.s32 q0, [r0] 489; CHECK-NEXT: bx lr 490entry: 491 %0 = bitcast i16* %base to <4 x i16>* 492 %1 = zext i16 %p to i32 493 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 494 %3 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %2, <4 x i16> zeroinitializer) 495 %4 = sext <4 x i16> %3 to <4 x i32> 496 ret <4 x i32> %4 497} 498 499declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) 500 501define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_u16(i16* %base, i16 zeroext %p) { 502; CHECK-LABEL: test_vldrhq_z_u16: 503; CHECK: @ %bb.0: @ %entry 504; CHECK-NEXT: vmsr p0, r1 505; CHECK-NEXT: vpst 506; CHECK-NEXT: vldrht.u16 q0, [r0] 507; CHECK-NEXT: bx lr 508entry: 509 %0 = bitcast i16* %base to <8 x i16>* 510 %1 = zext i16 %p to i32 511 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 512 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer) 513 ret <8 x i16> %3 514} 515 516define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_u32(i16* %base, i16 zeroext %p) { 517; CHECK-LABEL: test_vldrhq_z_u32: 518; CHECK: @ %bb.0: @ %entry 519; CHECK-NEXT: vmsr p0, r1 520; CHECK-NEXT: vpst 521; CHECK-NEXT: vldrht.u32 q0, [r0] 522; CHECK-NEXT: bx lr 523entry: 524 %0 = bitcast i16* %base to <4 x i16>* 525 %1 = zext i16 %p to i32 526 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 527 %3 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %2, <4 x i16> zeroinitializer) 528 %4 = zext <4 x i16> %3 to <4 x i32> 529 ret <4 x i32> %4 530} 531 532define arm_aapcs_vfpcc <4 x float> @test_vldrwq_f32(float* %base) { 533; CHECK-LABEL: test_vldrwq_f32: 534; CHECK: @ %bb.0: @ %entry 535; CHECK-NEXT: vldrw.u32 q0, [r0] 536; CHECK-NEXT: bx lr 537entry: 538 %0 = bitcast float* %base to <4 x float>* 539 %1 = load <4 x float>, <4 x float>* %0, align 4 540 ret <4 x float> %1 541} 542 543define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_s32(i32* %base) { 544; CHECK-LABEL: test_vldrwq_s32: 545; CHECK: @ %bb.0: @ %entry 546; CHECK-NEXT: vldrw.u32 q0, [r0] 547; CHECK-NEXT: bx lr 548entry: 549 %0 = bitcast i32* %base to <4 x i32>* 550 %1 = load <4 x i32>, <4 x i32>* %0, align 4 551 ret <4 x i32> %1 552} 553 554define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_u32(i32* %base) { 555; CHECK-LABEL: test_vldrwq_u32: 556; CHECK: @ %bb.0: @ %entry 557; CHECK-NEXT: vldrw.u32 q0, [r0] 558; CHECK-NEXT: bx lr 559entry: 560 %0 = bitcast i32* %base to <4 x i32>* 561 %1 = load <4 x i32>, <4 x i32>* %0, align 4 562 ret <4 x i32> %1 563} 564 565define arm_aapcs_vfpcc <4 x float> @test_vldrwq_z_f32(float* %base, i16 zeroext %p) { 566; CHECK-LABEL: test_vldrwq_z_f32: 567; CHECK: @ %bb.0: @ %entry 568; CHECK-NEXT: vmsr p0, r1 569; CHECK-NEXT: vpst 570; CHECK-NEXT: vldrwt.u32 q0, [r0] 571; CHECK-NEXT: bx lr 572entry: 573 %0 = bitcast float* %base to <4 x float>* 574 %1 = zext i16 %p to i32 575 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 576 %3 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %2, <4 x float> zeroinitializer) 577 ret <4 x float> %3 578} 579 580define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_s32(i32* %base, i16 zeroext %p) { 581; CHECK-LABEL: test_vldrwq_z_s32: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vmsr p0, r1 584; CHECK-NEXT: vpst 585; CHECK-NEXT: vldrwt.u32 q0, [r0] 586; CHECK-NEXT: bx lr 587entry: 588 %0 = bitcast i32* %base to <4 x i32>* 589 %1 = zext i16 %p to i32 590 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 591 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer) 592 ret <4 x i32> %3 593} 594 595define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_u32(i32* %base, i16 zeroext %p) { 596; CHECK-LABEL: test_vldrwq_z_u32: 597; CHECK: @ %bb.0: @ %entry 598; CHECK-NEXT: vmsr p0, r1 599; CHECK-NEXT: vpst 600; CHECK-NEXT: vldrwt.u32 q0, [r0] 601; CHECK-NEXT: bx lr 602entry: 603 %0 = bitcast i32* %base to <4 x i32>* 604 %1 = zext i16 %p to i32 605 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 606 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer) 607 ret <4 x i32> %3 608} 609 610define arm_aapcs_vfpcc void @test_vst1q_f16(half* %base, <8 x half> %value) { 611; CHECK-LABEL: test_vst1q_f16: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vstrh.16 q0, [r0] 614; CHECK-NEXT: bx lr 615entry: 616 %0 = bitcast half* %base to <8 x half>* 617 store <8 x half> %value, <8 x half>* %0, align 2 618 ret void 619} 620 621define arm_aapcs_vfpcc void @test_vst1q_f32(float* %base, <4 x float> %value) { 622; CHECK-LABEL: test_vst1q_f32: 623; CHECK: @ %bb.0: @ %entry 624; CHECK-NEXT: vstrw.32 q0, [r0] 625; CHECK-NEXT: bx lr 626entry: 627 %0 = bitcast float* %base to <4 x float>* 628 store <4 x float> %value, <4 x float>* %0, align 4 629 ret void 630} 631 632define arm_aapcs_vfpcc void @test_vst1q_s8(i8* %base, <16 x i8> %value) { 633; CHECK-LABEL: test_vst1q_s8: 634; CHECK: @ %bb.0: @ %entry 635; CHECK-NEXT: vstrb.8 q0, [r0] 636; CHECK-NEXT: bx lr 637entry: 638 %0 = bitcast i8* %base to <16 x i8>* 639 store <16 x i8> %value, <16 x i8>* %0, align 1 640 ret void 641} 642 643define arm_aapcs_vfpcc void @test_vst1q_s16(i16* %base, <8 x i16> %value) { 644; CHECK-LABEL: test_vst1q_s16: 645; CHECK: @ %bb.0: @ %entry 646; CHECK-NEXT: vstrh.16 q0, [r0] 647; CHECK-NEXT: bx lr 648entry: 649 %0 = bitcast i16* %base to <8 x i16>* 650 store <8 x i16> %value, <8 x i16>* %0, align 2 651 ret void 652} 653 654define arm_aapcs_vfpcc void @test_vst1q_s32(i32* %base, <4 x i32> %value) { 655; CHECK-LABEL: test_vst1q_s32: 656; CHECK: @ %bb.0: @ %entry 657; CHECK-NEXT: vstrw.32 q0, [r0] 658; CHECK-NEXT: bx lr 659entry: 660 %0 = bitcast i32* %base to <4 x i32>* 661 store <4 x i32> %value, <4 x i32>* %0, align 4 662 ret void 663} 664 665define arm_aapcs_vfpcc void @test_vst1q_u8(i8* %base, <16 x i8> %value) { 666; CHECK-LABEL: test_vst1q_u8: 667; CHECK: @ %bb.0: @ %entry 668; CHECK-NEXT: vstrb.8 q0, [r0] 669; CHECK-NEXT: bx lr 670entry: 671 %0 = bitcast i8* %base to <16 x i8>* 672 store <16 x i8> %value, <16 x i8>* %0, align 1 673 ret void 674} 675 676define arm_aapcs_vfpcc void @test_vst1q_u16(i16* %base, <8 x i16> %value) { 677; CHECK-LABEL: test_vst1q_u16: 678; CHECK: @ %bb.0: @ %entry 679; CHECK-NEXT: vstrh.16 q0, [r0] 680; CHECK-NEXT: bx lr 681entry: 682 %0 = bitcast i16* %base to <8 x i16>* 683 store <8 x i16> %value, <8 x i16>* %0, align 2 684 ret void 685} 686 687define arm_aapcs_vfpcc void @test_vst1q_u32(i32* %base, <4 x i32> %value) { 688; CHECK-LABEL: test_vst1q_u32: 689; CHECK: @ %bb.0: @ %entry 690; CHECK-NEXT: vstrw.32 q0, [r0] 691; CHECK-NEXT: bx lr 692entry: 693 %0 = bitcast i32* %base to <4 x i32>* 694 store <4 x i32> %value, <4 x i32>* %0, align 4 695 ret void 696} 697 698define arm_aapcs_vfpcc void @test_vst1q_p_f16(half* %base, <8 x half> %value, i16 zeroext %p) { 699; CHECK-LABEL: test_vst1q_p_f16: 700; CHECK: @ %bb.0: @ %entry 701; CHECK-NEXT: vmsr p0, r1 702; CHECK-NEXT: vpst 703; CHECK-NEXT: vstrht.16 q0, [r0] 704; CHECK-NEXT: bx lr 705entry: 706 %0 = bitcast half* %base to <8 x half>* 707 %1 = zext i16 %p to i32 708 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 709 call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %value, <8 x half>* %0, i32 2, <8 x i1> %2) 710 ret void 711} 712 713declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>) 714 715define arm_aapcs_vfpcc void @test_vst1q_p_f32(float* %base, <4 x float> %value, i16 zeroext %p) { 716; CHECK-LABEL: test_vst1q_p_f32: 717; CHECK: @ %bb.0: @ %entry 718; CHECK-NEXT: vmsr p0, r1 719; CHECK-NEXT: vpst 720; CHECK-NEXT: vstrwt.32 q0, [r0] 721; CHECK-NEXT: bx lr 722entry: 723 %0 = bitcast float* %base to <4 x float>* 724 %1 = zext i16 %p to i32 725 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 726 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %value, <4 x float>* %0, i32 4, <4 x i1> %2) 727 ret void 728} 729 730declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>) 731 732define arm_aapcs_vfpcc void @test_vst1q_p_s8(i8* %base, <16 x i8> %value, i16 zeroext %p) { 733; CHECK-LABEL: test_vst1q_p_s8: 734; CHECK: @ %bb.0: @ %entry 735; CHECK-NEXT: vmsr p0, r1 736; CHECK-NEXT: vpst 737; CHECK-NEXT: vstrbt.8 q0, [r0] 738; CHECK-NEXT: bx lr 739entry: 740 %0 = bitcast i8* %base to <16 x i8>* 741 %1 = zext i16 %p to i32 742 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 743 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2) 744 ret void 745} 746 747declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>) 748 749define arm_aapcs_vfpcc void @test_vst1q_p_s16(i16* %base, <8 x i16> %value, i16 zeroext %p) { 750; CHECK-LABEL: test_vst1q_p_s16: 751; CHECK: @ %bb.0: @ %entry 752; CHECK-NEXT: vmsr p0, r1 753; CHECK-NEXT: vpst 754; CHECK-NEXT: vstrht.16 q0, [r0] 755; CHECK-NEXT: bx lr 756entry: 757 %0 = bitcast i16* %base to <8 x i16>* 758 %1 = zext i16 %p to i32 759 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 760 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2) 761 ret void 762} 763 764declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) 765 766define arm_aapcs_vfpcc void @test_vst1q_p_s32(i32* %base, <4 x i32> %value, i16 zeroext %p) { 767; CHECK-LABEL: test_vst1q_p_s32: 768; CHECK: @ %bb.0: @ %entry 769; CHECK-NEXT: vmsr p0, r1 770; CHECK-NEXT: vpst 771; CHECK-NEXT: vstrwt.32 q0, [r0] 772; CHECK-NEXT: bx lr 773entry: 774 %0 = bitcast i32* %base to <4 x i32>* 775 %1 = zext i16 %p to i32 776 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 777 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2) 778 ret void 779} 780 781declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) 782 783define arm_aapcs_vfpcc void @test_vst1q_p_u8(i8* %base, <16 x i8> %value, i16 zeroext %p) { 784; CHECK-LABEL: test_vst1q_p_u8: 785; CHECK: @ %bb.0: @ %entry 786; CHECK-NEXT: vmsr p0, r1 787; CHECK-NEXT: vpst 788; CHECK-NEXT: vstrbt.8 q0, [r0] 789; CHECK-NEXT: bx lr 790entry: 791 %0 = bitcast i8* %base to <16 x i8>* 792 %1 = zext i16 %p to i32 793 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 794 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2) 795 ret void 796} 797 798define arm_aapcs_vfpcc void @test_vst1q_p_u16(i16* %base, <8 x i16> %value, i16 zeroext %p) { 799; CHECK-LABEL: test_vst1q_p_u16: 800; CHECK: @ %bb.0: @ %entry 801; CHECK-NEXT: vmsr p0, r1 802; CHECK-NEXT: vpst 803; CHECK-NEXT: vstrht.16 q0, [r0] 804; CHECK-NEXT: bx lr 805entry: 806 %0 = bitcast i16* %base to <8 x i16>* 807 %1 = zext i16 %p to i32 808 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 809 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2) 810 ret void 811} 812 813define arm_aapcs_vfpcc void @test_vst1q_p_u32(i32* %base, <4 x i32> %value, i16 zeroext %p) { 814; CHECK-LABEL: test_vst1q_p_u32: 815; CHECK: @ %bb.0: @ %entry 816; CHECK-NEXT: vmsr p0, r1 817; CHECK-NEXT: vpst 818; CHECK-NEXT: vstrwt.32 q0, [r0] 819; CHECK-NEXT: bx lr 820entry: 821 %0 = bitcast i32* %base to <4 x i32>* 822 %1 = zext i16 %p to i32 823 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 824 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2) 825 ret void 826} 827 828define arm_aapcs_vfpcc void @test_vstrbq_s8(i8* %base, <16 x i8> %value) { 829; CHECK-LABEL: test_vstrbq_s8: 830; CHECK: @ %bb.0: @ %entry 831; CHECK-NEXT: vstrb.8 q0, [r0] 832; CHECK-NEXT: bx lr 833entry: 834 %0 = bitcast i8* %base to <16 x i8>* 835 store <16 x i8> %value, <16 x i8>* %0, align 1 836 ret void 837} 838 839define arm_aapcs_vfpcc void @test_vstrbq_s16(i8* %base, <8 x i16> %value) { 840; CHECK-LABEL: test_vstrbq_s16: 841; CHECK: @ %bb.0: @ %entry 842; CHECK-NEXT: vstrb.16 q0, [r0] 843; CHECK-NEXT: bx lr 844entry: 845 %0 = trunc <8 x i16> %value to <8 x i8> 846 %1 = bitcast i8* %base to <8 x i8>* 847 store <8 x i8> %0, <8 x i8>* %1, align 1 848 ret void 849} 850 851define arm_aapcs_vfpcc void @test_vstrbq_s32(i8* %base, <4 x i32> %value) { 852; CHECK-LABEL: test_vstrbq_s32: 853; CHECK: @ %bb.0: @ %entry 854; CHECK-NEXT: vstrb.32 q0, [r0] 855; CHECK-NEXT: bx lr 856entry: 857 %0 = trunc <4 x i32> %value to <4 x i8> 858 %1 = bitcast i8* %base to <4 x i8>* 859 store <4 x i8> %0, <4 x i8>* %1, align 1 860 ret void 861} 862 863define arm_aapcs_vfpcc void @test_vstrbq_u8(i8* %base, <16 x i8> %value) { 864; CHECK-LABEL: test_vstrbq_u8: 865; CHECK: @ %bb.0: @ %entry 866; CHECK-NEXT: vstrb.8 q0, [r0] 867; CHECK-NEXT: bx lr 868entry: 869 %0 = bitcast i8* %base to <16 x i8>* 870 store <16 x i8> %value, <16 x i8>* %0, align 1 871 ret void 872} 873 874define arm_aapcs_vfpcc void @test_vstrbq_u16(i8* %base, <8 x i16> %value) { 875; CHECK-LABEL: test_vstrbq_u16: 876; CHECK: @ %bb.0: @ %entry 877; CHECK-NEXT: vstrb.16 q0, [r0] 878; CHECK-NEXT: bx lr 879entry: 880 %0 = trunc <8 x i16> %value to <8 x i8> 881 %1 = bitcast i8* %base to <8 x i8>* 882 store <8 x i8> %0, <8 x i8>* %1, align 1 883 ret void 884} 885 886define arm_aapcs_vfpcc void @test_vstrbq_u32(i8* %base, <4 x i32> %value) { 887; CHECK-LABEL: test_vstrbq_u32: 888; CHECK: @ %bb.0: @ %entry 889; CHECK-NEXT: vstrb.32 q0, [r0] 890; CHECK-NEXT: bx lr 891entry: 892 %0 = trunc <4 x i32> %value to <4 x i8> 893 %1 = bitcast i8* %base to <4 x i8>* 894 store <4 x i8> %0, <4 x i8>* %1, align 1 895 ret void 896} 897 898define arm_aapcs_vfpcc void @test_vstrbq_p_s8(i8* %base, <16 x i8> %value, i16 zeroext %p) { 899; CHECK-LABEL: test_vstrbq_p_s8: 900; CHECK: @ %bb.0: @ %entry 901; CHECK-NEXT: vmsr p0, r1 902; CHECK-NEXT: vpst 903; CHECK-NEXT: vstrbt.8 q0, [r0] 904; CHECK-NEXT: bx lr 905entry: 906 %0 = bitcast i8* %base to <16 x i8>* 907 %1 = zext i16 %p to i32 908 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 909 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2) 910 ret void 911} 912 913define arm_aapcs_vfpcc void @test_vstrbq_p_s16(i8* %base, <8 x i16> %value, i16 zeroext %p) { 914; CHECK-LABEL: test_vstrbq_p_s16: 915; CHECK: @ %bb.0: @ %entry 916; CHECK-NEXT: vmsr p0, r1 917; CHECK-NEXT: vpst 918; CHECK-NEXT: vstrbt.16 q0, [r0] 919; CHECK-NEXT: bx lr 920entry: 921 %0 = trunc <8 x i16> %value to <8 x i8> 922 %1 = bitcast i8* %base to <8 x i8>* 923 %2 = zext i16 %p to i32 924 %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 925 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %0, <8 x i8>* %1, i32 1, <8 x i1> %3) 926 ret void 927} 928 929declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>) 930 931define arm_aapcs_vfpcc void @test_vstrbq_p_s32(i8* %base, <4 x i32> %value, i16 zeroext %p) { 932; CHECK-LABEL: test_vstrbq_p_s32: 933; CHECK: @ %bb.0: @ %entry 934; CHECK-NEXT: vmsr p0, r1 935; CHECK-NEXT: vpst 936; CHECK-NEXT: vstrbt.32 q0, [r0] 937; CHECK-NEXT: bx lr 938entry: 939 %0 = trunc <4 x i32> %value to <4 x i8> 940 %1 = bitcast i8* %base to <4 x i8>* 941 %2 = zext i16 %p to i32 942 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 943 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %0, <4 x i8>* %1, i32 1, <4 x i1> %3) 944 ret void 945} 946 947declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32 immarg, <4 x i1>) 948 949define arm_aapcs_vfpcc void @test_vstrbq_p_u8(i8* %base, <16 x i8> %value, i16 zeroext %p) { 950; CHECK-LABEL: test_vstrbq_p_u8: 951; CHECK: @ %bb.0: @ %entry 952; CHECK-NEXT: vmsr p0, r1 953; CHECK-NEXT: vpst 954; CHECK-NEXT: vstrbt.8 q0, [r0] 955; CHECK-NEXT: bx lr 956entry: 957 %0 = bitcast i8* %base to <16 x i8>* 958 %1 = zext i16 %p to i32 959 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1) 960 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2) 961 ret void 962} 963 964define arm_aapcs_vfpcc void @test_vstrbq_p_u16(i8* %base, <8 x i16> %value, i16 zeroext %p) { 965; CHECK-LABEL: test_vstrbq_p_u16: 966; CHECK: @ %bb.0: @ %entry 967; CHECK-NEXT: vmsr p0, r1 968; CHECK-NEXT: vpst 969; CHECK-NEXT: vstrbt.16 q0, [r0] 970; CHECK-NEXT: bx lr 971entry: 972 %0 = trunc <8 x i16> %value to <8 x i8> 973 %1 = bitcast i8* %base to <8 x i8>* 974 %2 = zext i16 %p to i32 975 %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2) 976 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %0, <8 x i8>* %1, i32 1, <8 x i1> %3) 977 ret void 978} 979 980define arm_aapcs_vfpcc void @test_vstrbq_p_u32(i8* %base, <4 x i32> %value, i16 zeroext %p) { 981; CHECK-LABEL: test_vstrbq_p_u32: 982; CHECK: @ %bb.0: @ %entry 983; CHECK-NEXT: vmsr p0, r1 984; CHECK-NEXT: vpst 985; CHECK-NEXT: vstrbt.32 q0, [r0] 986; CHECK-NEXT: bx lr 987entry: 988 %0 = trunc <4 x i32> %value to <4 x i8> 989 %1 = bitcast i8* %base to <4 x i8>* 990 %2 = zext i16 %p to i32 991 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 992 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %0, <4 x i8>* %1, i32 1, <4 x i1> %3) 993 ret void 994} 995 996define arm_aapcs_vfpcc void @test_vstrhq_f16(half* %base, <8 x half> %value) { 997; CHECK-LABEL: test_vstrhq_f16: 998; CHECK: @ %bb.0: @ %entry 999; CHECK-NEXT: vstrh.16 q0, [r0] 1000; CHECK-NEXT: bx lr 1001entry: 1002 %0 = bitcast half* %base to <8 x half>* 1003 store <8 x half> %value, <8 x half>* %0, align 2 1004 ret void 1005} 1006 1007define arm_aapcs_vfpcc void @test_vstrhq_s16(i16* %base, <8 x i16> %value) { 1008; CHECK-LABEL: test_vstrhq_s16: 1009; CHECK: @ %bb.0: @ %entry 1010; CHECK-NEXT: vstrh.16 q0, [r0] 1011; CHECK-NEXT: bx lr 1012entry: 1013 %0 = bitcast i16* %base to <8 x i16>* 1014 store <8 x i16> %value, <8 x i16>* %0, align 2 1015 ret void 1016} 1017 1018define arm_aapcs_vfpcc void @test_vstrhq_s32(i16* %base, <4 x i32> %value) { 1019; CHECK-LABEL: test_vstrhq_s32: 1020; CHECK: @ %bb.0: @ %entry 1021; CHECK-NEXT: vstrh.32 q0, [r0] 1022; CHECK-NEXT: bx lr 1023entry: 1024 %0 = trunc <4 x i32> %value to <4 x i16> 1025 %1 = bitcast i16* %base to <4 x i16>* 1026 store <4 x i16> %0, <4 x i16>* %1, align 2 1027 ret void 1028} 1029 1030define arm_aapcs_vfpcc void @test_vstrhq_u16(i16* %base, <8 x i16> %value) { 1031; CHECK-LABEL: test_vstrhq_u16: 1032; CHECK: @ %bb.0: @ %entry 1033; CHECK-NEXT: vstrh.16 q0, [r0] 1034; CHECK-NEXT: bx lr 1035entry: 1036 %0 = bitcast i16* %base to <8 x i16>* 1037 store <8 x i16> %value, <8 x i16>* %0, align 2 1038 ret void 1039} 1040 1041define arm_aapcs_vfpcc void @test_vstrhq_u32(i16* %base, <4 x i32> %value) { 1042; CHECK-LABEL: test_vstrhq_u32: 1043; CHECK: @ %bb.0: @ %entry 1044; CHECK-NEXT: vstrh.32 q0, [r0] 1045; CHECK-NEXT: bx lr 1046entry: 1047 %0 = trunc <4 x i32> %value to <4 x i16> 1048 %1 = bitcast i16* %base to <4 x i16>* 1049 store <4 x i16> %0, <4 x i16>* %1, align 2 1050 ret void 1051} 1052 1053define arm_aapcs_vfpcc void @test_vstrhq_p_f16(half* %base, <8 x half> %value, i16 zeroext %p) { 1054; CHECK-LABEL: test_vstrhq_p_f16: 1055; CHECK: @ %bb.0: @ %entry 1056; CHECK-NEXT: vmsr p0, r1 1057; CHECK-NEXT: vpst 1058; CHECK-NEXT: vstrht.16 q0, [r0] 1059; CHECK-NEXT: bx lr 1060entry: 1061 %0 = bitcast half* %base to <8 x half>* 1062 %1 = zext i16 %p to i32 1063 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 1064 call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %value, <8 x half>* %0, i32 2, <8 x i1> %2) 1065 ret void 1066} 1067 1068define arm_aapcs_vfpcc void @test_vstrhq_p_s16(i16* %base, <8 x i16> %value, i16 zeroext %p) { 1069; CHECK-LABEL: test_vstrhq_p_s16: 1070; CHECK: @ %bb.0: @ %entry 1071; CHECK-NEXT: vmsr p0, r1 1072; CHECK-NEXT: vpst 1073; CHECK-NEXT: vstrht.16 q0, [r0] 1074; CHECK-NEXT: bx lr 1075entry: 1076 %0 = bitcast i16* %base to <8 x i16>* 1077 %1 = zext i16 %p to i32 1078 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 1079 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2) 1080 ret void 1081} 1082 1083define arm_aapcs_vfpcc void @test_vstrhq_p_s32(i16* %base, <4 x i32> %value, i16 zeroext %p) { 1084; CHECK-LABEL: test_vstrhq_p_s32: 1085; CHECK: @ %bb.0: @ %entry 1086; CHECK-NEXT: vmsr p0, r1 1087; CHECK-NEXT: vpst 1088; CHECK-NEXT: vstrht.32 q0, [r0] 1089; CHECK-NEXT: bx lr 1090entry: 1091 %0 = trunc <4 x i32> %value to <4 x i16> 1092 %1 = bitcast i16* %base to <4 x i16>* 1093 %2 = zext i16 %p to i32 1094 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 1095 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %0, <4 x i16>* %1, i32 2, <4 x i1> %3) 1096 ret void 1097} 1098 1099declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>) 1100 1101define arm_aapcs_vfpcc void @test_vstrhq_p_u16(i16* %base, <8 x i16> %value, i16 zeroext %p) { 1102; CHECK-LABEL: test_vstrhq_p_u16: 1103; CHECK: @ %bb.0: @ %entry 1104; CHECK-NEXT: vmsr p0, r1 1105; CHECK-NEXT: vpst 1106; CHECK-NEXT: vstrht.16 q0, [r0] 1107; CHECK-NEXT: bx lr 1108entry: 1109 %0 = bitcast i16* %base to <8 x i16>* 1110 %1 = zext i16 %p to i32 1111 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 1112 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2) 1113 ret void 1114} 1115 1116define arm_aapcs_vfpcc void @test_vstrhq_p_u32(i16* %base, <4 x i32> %value, i16 zeroext %p) { 1117; CHECK-LABEL: test_vstrhq_p_u32: 1118; CHECK: @ %bb.0: @ %entry 1119; CHECK-NEXT: vmsr p0, r1 1120; CHECK-NEXT: vpst 1121; CHECK-NEXT: vstrht.32 q0, [r0] 1122; CHECK-NEXT: bx lr 1123entry: 1124 %0 = trunc <4 x i32> %value to <4 x i16> 1125 %1 = bitcast i16* %base to <4 x i16>* 1126 %2 = zext i16 %p to i32 1127 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) 1128 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %0, <4 x i16>* %1, i32 2, <4 x i1> %3) 1129 ret void 1130} 1131 1132define arm_aapcs_vfpcc void @test_vstrwq_f32(float* %base, <4 x float> %value) { 1133; CHECK-LABEL: test_vstrwq_f32: 1134; CHECK: @ %bb.0: @ %entry 1135; CHECK-NEXT: vstrw.32 q0, [r0] 1136; CHECK-NEXT: bx lr 1137entry: 1138 %0 = bitcast float* %base to <4 x float>* 1139 store <4 x float> %value, <4 x float>* %0, align 4 1140 ret void 1141} 1142 1143define arm_aapcs_vfpcc void @test_vstrwq_s32(i32* %base, <4 x i32> %value) { 1144; CHECK-LABEL: test_vstrwq_s32: 1145; CHECK: @ %bb.0: @ %entry 1146; CHECK-NEXT: vstrw.32 q0, [r0] 1147; CHECK-NEXT: bx lr 1148entry: 1149 %0 = bitcast i32* %base to <4 x i32>* 1150 store <4 x i32> %value, <4 x i32>* %0, align 4 1151 ret void 1152} 1153 1154define arm_aapcs_vfpcc void @test_vstrwq_u32(i32* %base, <4 x i32> %value) { 1155; CHECK-LABEL: test_vstrwq_u32: 1156; CHECK: @ %bb.0: @ %entry 1157; CHECK-NEXT: vstrw.32 q0, [r0] 1158; CHECK-NEXT: bx lr 1159entry: 1160 %0 = bitcast i32* %base to <4 x i32>* 1161 store <4 x i32> %value, <4 x i32>* %0, align 4 1162 ret void 1163} 1164 1165define arm_aapcs_vfpcc void @test_vstrwq_p_f32(float* %base, <4 x float> %value, i16 zeroext %p) { 1166; CHECK-LABEL: test_vstrwq_p_f32: 1167; CHECK: @ %bb.0: @ %entry 1168; CHECK-NEXT: vmsr p0, r1 1169; CHECK-NEXT: vpst 1170; CHECK-NEXT: vstrwt.32 q0, [r0] 1171; CHECK-NEXT: bx lr 1172entry: 1173 %0 = bitcast float* %base to <4 x float>* 1174 %1 = zext i16 %p to i32 1175 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1176 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %value, <4 x float>* %0, i32 4, <4 x i1> %2) 1177 ret void 1178} 1179 1180define arm_aapcs_vfpcc void @test_vstrwq_p_s32(i32* %base, <4 x i32> %value, i16 zeroext %p) { 1181; CHECK-LABEL: test_vstrwq_p_s32: 1182; CHECK: @ %bb.0: @ %entry 1183; CHECK-NEXT: vmsr p0, r1 1184; CHECK-NEXT: vpst 1185; CHECK-NEXT: vstrwt.32 q0, [r0] 1186; CHECK-NEXT: bx lr 1187entry: 1188 %0 = bitcast i32* %base to <4 x i32>* 1189 %1 = zext i16 %p to i32 1190 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1191 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2) 1192 ret void 1193} 1194 1195define arm_aapcs_vfpcc void @test_vstrwq_p_u32(i32* %base, <4 x i32> %value, i16 zeroext %p) { 1196; CHECK-LABEL: test_vstrwq_p_u32: 1197; CHECK: @ %bb.0: @ %entry 1198; CHECK-NEXT: vmsr p0, r1 1199; CHECK-NEXT: vpst 1200; CHECK-NEXT: vstrwt.32 q0, [r0] 1201; CHECK-NEXT: bx lr 1202entry: 1203 %0 = bitcast i32* %base to <4 x i32>* 1204 %1 = zext i16 %p to i32 1205 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1206 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2) 1207 ret void 1208} 1209