1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -opaque-pointers %s -o - | FileCheck %s 4 5; i32 6 7define arm_aapcs_vfpcc <2 x i32> @ptr_v2i32(<2 x i32*>* %offptr) { 8; CHECK-LABEL: ptr_v2i32: 9; CHECK: @ %bb.0: @ %entry 10; CHECK-NEXT: ldrd r1, r0, [r0] 11; CHECK-NEXT: ldr r0, [r0] 12; CHECK-NEXT: ldr r1, [r1] 13; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 14; CHECK-NEXT: bx lr 15entry: 16 %offs = load <2 x i32*>, <2 x i32*>* %offptr, align 4 17 %gather = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %offs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) 18 ret <2 x i32> %gather 19} 20 21define arm_aapcs_vfpcc <4 x i32> @ptr_v4i32(<4 x i32*>* %offptr) { 22; CHECK-LABEL: ptr_v4i32: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vldrw.u32 q1, [r0] 25; CHECK-NEXT: vldrw.u32 q0, [q1] 26; CHECK-NEXT: bx lr 27entry: 28 %offs = load <4 x i32*>, <4 x i32*>* %offptr, align 4 29 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %offs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 30 ret <4 x i32> %gather 31} 32 33define arm_aapcs_vfpcc <8 x i32> @ptr_v8i32(<8 x i32*>* %offptr) { 34; CHECK-LABEL: ptr_v8i32: 35; CHECK: @ %bb.0: @ %entry 36; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 37; CHECK-NEXT: push {r4, r5, r6, r7, lr} 38; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 39; CHECK-NEXT: vmov r1, r2, d1 40; CHECK-NEXT: vmov r3, r12, d0 41; CHECK-NEXT: vldrw.u32 q0, [r0] 42; CHECK-NEXT: vmov r0, lr, d1 43; CHECK-NEXT: ldr r7, [r2] 44; CHECK-NEXT: vmov r2, r4, d0 45; CHECK-NEXT: ldr r6, [r1] 46; CHECK-NEXT: ldr r3, [r3] 47; CHECK-NEXT: ldr r0, [r0] 48; CHECK-NEXT: ldr.w r1, [r12] 49; CHECK-NEXT: vmov q1[2], q1[0], r3, r6 50; CHECK-NEXT: ldr.w r5, [lr] 51; CHECK-NEXT: vmov q1[3], q1[1], r1, r7 52; CHECK-NEXT: ldr r2, [r2] 53; CHECK-NEXT: ldr r4, [r4] 54; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 55; CHECK-NEXT: vmov q0[3], q0[1], r4, r5 56; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 57entry: 58 %offs = load <8 x i32*>, <8 x i32*>* %offptr, align 4 59 %gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 60 ret <8 x i32> %gather 61} 62 63define arm_aapcs_vfpcc <16 x i32> @ptr_v16i32(<16 x i32*>* %offptr) { 64; CHECK-LABEL: ptr_v16i32: 65; CHECK: @ %bb.0: @ %entry 66; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 67; CHECK-NEXT: push {r4, r5, r6, r7, lr} 68; CHECK-NEXT: vldrw.u32 q0, [r0, #48] 69; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 70; CHECK-NEXT: vldrw.u32 q2, [r0, #32] 71; CHECK-NEXT: vmov r1, r2, d1 72; CHECK-NEXT: vmov r3, lr, d0 73; CHECK-NEXT: vldrw.u32 q0, [r0] 74; CHECK-NEXT: vmov r4, r5, d1 75; CHECK-NEXT: ldr r7, [r2] 76; CHECK-NEXT: vmov r2, r6, d0 77; CHECK-NEXT: ldr.w r12, [r1] 78; CHECK-NEXT: ldr r3, [r3] 79; CHECK-NEXT: ldr r4, [r4] 80; CHECK-NEXT: ldr r5, [r5] 81; CHECK-NEXT: vmov q3[2], q3[0], r3, r12 82; CHECK-NEXT: ldr.w r1, [lr] 83; CHECK-NEXT: vmov q3[3], q3[1], r1, r7 84; CHECK-NEXT: ldr r2, [r2] 85; CHECK-NEXT: ldr r6, [r6] 86; CHECK-NEXT: vmov q0[2], q0[0], r2, r4 87; CHECK-NEXT: vmov r2, r4, d3 88; CHECK-NEXT: vmov q0[3], q0[1], r6, r5 89; CHECK-NEXT: vmov r6, r5, d2 90; CHECK-NEXT: ldr r2, [r2] 91; CHECK-NEXT: ldr r6, [r6] 92; CHECK-NEXT: ldr r5, [r5] 93; CHECK-NEXT: vmov q1[2], q1[0], r6, r2 94; CHECK-NEXT: ldr r6, [r4] 95; CHECK-NEXT: vmov r0, r2, d5 96; CHECK-NEXT: vmov q1[3], q1[1], r5, r6 97; CHECK-NEXT: vmov r6, r5, d4 98; CHECK-NEXT: ldr r0, [r0] 99; CHECK-NEXT: ldr r6, [r6] 100; CHECK-NEXT: ldr r2, [r2] 101; CHECK-NEXT: ldr r5, [r5] 102; CHECK-NEXT: vmov q2[2], q2[0], r6, r0 103; CHECK-NEXT: vmov q2[3], q2[1], r5, r2 104; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 105entry: 106 %offs = load <16 x i32*>, <16 x i32*>* %offptr, align 4 107 %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %offs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef) 108 ret <16 x i32> %gather 109} 110 111; f32 112 113define arm_aapcs_vfpcc <2 x float> @ptr_v2f32(<2 x float*>* %offptr) { 114; CHECK-LABEL: ptr_v2f32: 115; CHECK: @ %bb.0: @ %entry 116; CHECK-NEXT: ldrd r1, r0, [r0] 117; CHECK-NEXT: vldr s1, [r0] 118; CHECK-NEXT: vldr s0, [r1] 119; CHECK-NEXT: bx lr 120entry: 121 %offs = load <2 x float*>, <2 x float*>* %offptr, align 4 122 %gather = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %offs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef) 123 ret <2 x float> %gather 124} 125 126define arm_aapcs_vfpcc <4 x float> @ptr_v4f32(<4 x float*>* %offptr) { 127; CHECK-LABEL: ptr_v4f32: 128; CHECK: @ %bb.0: @ %entry 129; CHECK-NEXT: vldrw.u32 q1, [r0] 130; CHECK-NEXT: vldrw.u32 q0, [q1] 131; CHECK-NEXT: bx lr 132entry: 133 %offs = load <4 x float*>, <4 x float*>* %offptr, align 4 134 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %offs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 135 ret <4 x float> %gather 136} 137 138define arm_aapcs_vfpcc <8 x float> @ptr_v8f32(<8 x float*>* %offptr) { 139; CHECK-LABEL: ptr_v8f32: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: .save {r4, r5, r7, lr} 142; CHECK-NEXT: push {r4, r5, r7, lr} 143; CHECK-NEXT: vldrw.u32 q0, [r0] 144; CHECK-NEXT: vmov r12, r2, d1 145; CHECK-NEXT: vmov lr, r1, d0 146; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 147; CHECK-NEXT: vmov r0, r3, d1 148; CHECK-NEXT: vmov r4, r5, d0 149; CHECK-NEXT: vldr s3, [r2] 150; CHECK-NEXT: vldr s2, [r12] 151; CHECK-NEXT: vldr s1, [r1] 152; CHECK-NEXT: vldr s0, [lr] 153; CHECK-NEXT: vldr s7, [r3] 154; CHECK-NEXT: vldr s6, [r0] 155; CHECK-NEXT: vldr s5, [r5] 156; CHECK-NEXT: vldr s4, [r4] 157; CHECK-NEXT: pop {r4, r5, r7, pc} 158entry: 159 %offs = load <8 x float*>, <8 x float*>* %offptr, align 4 160 %gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef) 161 ret <8 x float> %gather 162} 163 164; i16 165 166define arm_aapcs_vfpcc <8 x i16> @ptr_i16(<8 x i16*>* %offptr) { 167; CHECK-LABEL: ptr_i16: 168; CHECK: @ %bb.0: @ %entry 169; CHECK-NEXT: .save {r4, r5, r6, lr} 170; CHECK-NEXT: push {r4, r5, r6, lr} 171; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 172; CHECK-NEXT: vmov r1, r2, d0 173; CHECK-NEXT: vmov r3, r12, d1 174; CHECK-NEXT: vldrw.u32 q0, [r0] 175; CHECK-NEXT: vmov r4, r5, d0 176; CHECK-NEXT: vmov r0, lr, d1 177; CHECK-NEXT: ldrh r1, [r1] 178; CHECK-NEXT: ldrh r6, [r3] 179; CHECK-NEXT: ldrh r2, [r2] 180; CHECK-NEXT: ldrh r4, [r4] 181; CHECK-NEXT: ldrh r5, [r5] 182; CHECK-NEXT: vmov.16 q0[0], r4 183; CHECK-NEXT: ldrh r0, [r0] 184; CHECK-NEXT: vmov.16 q0[1], r5 185; CHECK-NEXT: ldrh.w r3, [lr] 186; CHECK-NEXT: vmov.16 q0[2], r0 187; CHECK-NEXT: ldrh.w r12, [r12] 188; CHECK-NEXT: vmov.16 q0[3], r3 189; CHECK-NEXT: vmov.16 q0[4], r1 190; CHECK-NEXT: vmov.16 q0[5], r2 191; CHECK-NEXT: vmov.16 q0[6], r6 192; CHECK-NEXT: vmov.16 q0[7], r12 193; CHECK-NEXT: pop {r4, r5, r6, pc} 194entry: 195 %offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4 196 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 197 ret <8 x i16> %gather 198} 199 200define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_sext(<2 x i16*>* %offptr) { 201; CHECK-LABEL: ptr_v2i16_sext: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: ldrd r1, r0, [r0] 204; CHECK-NEXT: ldrsh.w r0, [r0] 205; CHECK-NEXT: ldrsh.w r1, [r1] 206; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 207; CHECK-NEXT: asrs r0, r0, #31 208; CHECK-NEXT: asrs r1, r1, #31 209; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 210; CHECK-NEXT: bx lr 211entry: 212 %offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4 213 %gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> %offs, i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> undef) 214 %ext = sext <2 x i16> %gather to <2 x i32> 215 ret <2 x i32> %ext 216} 217 218define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(<2 x i16*>* %offptr) { 219; CHECK-LABEL: ptr_v2i16_zext: 220; CHECK: @ %bb.0: @ %entry 221; CHECK-NEXT: ldrd r1, r0, [r0] 222; CHECK-NEXT: vmov.i64 q0, #0xffff 223; CHECK-NEXT: ldrh r0, [r0] 224; CHECK-NEXT: ldrh r1, [r1] 225; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 226; CHECK-NEXT: vand q0, q1, q0 227; CHECK-NEXT: bx lr 228entry: 229 %offs = load <2 x i16*>, <2 x i16*>* %offptr, align 4 230 %gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> %offs, i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> undef) 231 %ext = zext <2 x i16> %gather to <2 x i32> 232 ret <2 x i32> %ext 233} 234 235define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_sext(<4 x i16*>* %offptr) { 236; CHECK-LABEL: ptr_v4i16_sext: 237; CHECK: @ %bb.0: @ %entry 238; CHECK-NEXT: vldrw.u32 q1, [r0] 239; CHECK-NEXT: movs r1, #0 240; CHECK-NEXT: vldrh.s32 q0, [r1, q1] 241; CHECK-NEXT: bx lr 242entry: 243 %offs = load <4 x i16*>, <4 x i16*>* %offptr, align 4 244 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 245 %ext = sext <4 x i16> %gather to <4 x i32> 246 ret <4 x i32> %ext 247} 248 249define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_zext(<4 x i16*>* %offptr) { 250; CHECK-LABEL: ptr_v4i16_zext: 251; CHECK: @ %bb.0: @ %entry 252; CHECK-NEXT: vldrw.u32 q1, [r0] 253; CHECK-NEXT: movs r1, #0 254; CHECK-NEXT: vldrh.u32 q0, [r1, q1] 255; CHECK-NEXT: bx lr 256entry: 257 %offs = load <4 x i16*>, <4 x i16*>* %offptr, align 4 258 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 259 %ext = zext <4 x i16> %gather to <4 x i32> 260 ret <4 x i32> %ext 261} 262 263define arm_aapcs_vfpcc <4 x i16> @ptr_v4i16(<4 x i16*>* %offptr) { 264; CHECK-LABEL: ptr_v4i16: 265; CHECK: @ %bb.0: @ %entry 266; CHECK-NEXT: vldrw.u32 q1, [r0] 267; CHECK-NEXT: movs r1, #0 268; CHECK-NEXT: vldrh.u32 q0, [r1, q1] 269; CHECK-NEXT: bx lr 270entry: 271 %offs = load <4 x i16*>, <4 x i16*>* %offptr, align 4 272 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 273 ret <4 x i16> %gather 274} 275 276define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_sext(<8 x i16*>* %offptr) { 277; CHECK-LABEL: ptr_v8i16_sext: 278; CHECK: @ %bb.0: @ %entry 279; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 280; CHECK-NEXT: push {r4, r5, r6, r7, lr} 281; CHECK-NEXT: .pad #16 282; CHECK-NEXT: sub sp, #16 283; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 284; CHECK-NEXT: vmov r3, r1, d1 285; CHECK-NEXT: vmov r12, r2, d0 286; CHECK-NEXT: vldrw.u32 q0, [r0] 287; CHECK-NEXT: vmov lr, r0, d1 288; CHECK-NEXT: ldrh r7, [r1] 289; CHECK-NEXT: ldrh.w r1, [r12] 290; CHECK-NEXT: ldrh r2, [r2] 291; CHECK-NEXT: ldrh r4, [r0] 292; CHECK-NEXT: vmov r0, r5, d0 293; CHECK-NEXT: ldrh.w r6, [lr] 294; CHECK-NEXT: ldrh r3, [r3] 295; CHECK-NEXT: ldrh r0, [r0] 296; CHECK-NEXT: ldrh r5, [r5] 297; CHECK-NEXT: vmov.16 q0[0], r0 298; CHECK-NEXT: mov r0, sp 299; CHECK-NEXT: vmov.16 q0[1], r5 300; CHECK-NEXT: vmov.16 q0[2], r6 301; CHECK-NEXT: vmov.16 q0[3], r4 302; CHECK-NEXT: vmov.16 q0[4], r1 303; CHECK-NEXT: vmov.16 q0[5], r2 304; CHECK-NEXT: vmov.16 q0[6], r3 305; CHECK-NEXT: vmov.16 q0[7], r7 306; CHECK-NEXT: vstrw.32 q0, [r0] 307; CHECK-NEXT: vldrh.s32 q0, [r0] 308; CHECK-NEXT: vldrh.s32 q1, [r0, #8] 309; CHECK-NEXT: add sp, #16 310; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 311entry: 312 %offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4 313 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 314 %ext = sext <8 x i16> %gather to <8 x i32> 315 ret <8 x i32> %ext 316} 317 318define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_zext(<8 x i16*>* %offptr) { 319; CHECK-LABEL: ptr_v8i16_zext: 320; CHECK: @ %bb.0: @ %entry 321; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 322; CHECK-NEXT: push {r4, r5, r6, r7, lr} 323; CHECK-NEXT: .pad #16 324; CHECK-NEXT: sub sp, #16 325; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 326; CHECK-NEXT: vmov r3, r1, d1 327; CHECK-NEXT: vmov r12, r2, d0 328; CHECK-NEXT: vldrw.u32 q0, [r0] 329; CHECK-NEXT: vmov lr, r0, d1 330; CHECK-NEXT: ldrh r7, [r1] 331; CHECK-NEXT: ldrh.w r1, [r12] 332; CHECK-NEXT: ldrh r2, [r2] 333; CHECK-NEXT: ldrh r4, [r0] 334; CHECK-NEXT: vmov r0, r5, d0 335; CHECK-NEXT: ldrh.w r6, [lr] 336; CHECK-NEXT: ldrh r3, [r3] 337; CHECK-NEXT: ldrh r0, [r0] 338; CHECK-NEXT: ldrh r5, [r5] 339; CHECK-NEXT: vmov.16 q0[0], r0 340; CHECK-NEXT: mov r0, sp 341; CHECK-NEXT: vmov.16 q0[1], r5 342; CHECK-NEXT: vmov.16 q0[2], r6 343; CHECK-NEXT: vmov.16 q0[3], r4 344; CHECK-NEXT: vmov.16 q0[4], r1 345; CHECK-NEXT: vmov.16 q0[5], r2 346; CHECK-NEXT: vmov.16 q0[6], r3 347; CHECK-NEXT: vmov.16 q0[7], r7 348; CHECK-NEXT: vstrw.32 q0, [r0] 349; CHECK-NEXT: vldrh.u32 q0, [r0] 350; CHECK-NEXT: vldrh.u32 q1, [r0, #8] 351; CHECK-NEXT: add sp, #16 352; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 353entry: 354 %offs = load <8 x i16*>, <8 x i16*>* %offptr, align 4 355 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 356 %ext = zext <8 x i16> %gather to <8 x i32> 357 ret <8 x i32> %ext 358} 359 360; f16 361 362define arm_aapcs_vfpcc <8 x half> @ptr_f16(<8 x half*>* %offptr) { 363; CHECK-LABEL: ptr_f16: 364; CHECK: @ %bb.0: @ %entry 365; CHECK-NEXT: vldrw.u32 q0, [r0] 366; CHECK-NEXT: vmov r1, r2, d0 367; CHECK-NEXT: vldr.16 s4, [r2] 368; CHECK-NEXT: vldr.16 s0, [r1] 369; CHECK-NEXT: vmov r1, r2, d1 370; CHECK-NEXT: vins.f16 s0, s4 371; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 372; CHECK-NEXT: vldr.16 s1, [r1] 373; CHECK-NEXT: vldr.16 s2, [r2] 374; CHECK-NEXT: vmov r0, r1, d2 375; CHECK-NEXT: vins.f16 s1, s2 376; CHECK-NEXT: vldr.16 s4, [r1] 377; CHECK-NEXT: vldr.16 s2, [r0] 378; CHECK-NEXT: vmov r0, r1, d3 379; CHECK-NEXT: vldr.16 s3, [r0] 380; CHECK-NEXT: vins.f16 s2, s4 381; CHECK-NEXT: vldr.16 s4, [r1] 382; CHECK-NEXT: vins.f16 s3, s4 383; CHECK-NEXT: bx lr 384entry: 385 %offs = load <8 x half*>, <8 x half*>* %offptr, align 4 386 %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) 387 ret <8 x half> %gather 388} 389 390define arm_aapcs_vfpcc <4 x half> @ptr_v4f16(<4 x half*>* %offptr) { 391; CHECK-LABEL: ptr_v4f16: 392; CHECK: @ %bb.0: @ %entry 393; CHECK-NEXT: vldrw.u32 q0, [r0] 394; CHECK-NEXT: vmov r0, r1, d0 395; CHECK-NEXT: vldr.16 s4, [r1] 396; CHECK-NEXT: vldr.16 s0, [r0] 397; CHECK-NEXT: vmov r0, r1, d1 398; CHECK-NEXT: vldr.16 s2, [r1] 399; CHECK-NEXT: vldr.16 s1, [r0] 400; CHECK-NEXT: vins.f16 s0, s4 401; CHECK-NEXT: vins.f16 s1, s2 402; CHECK-NEXT: bx lr 403entry: 404 %offs = load <4 x half*>, <4 x half*>* %offptr, align 4 405 %gather = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x half> undef) 406 ret <4 x half> %gather 407} 408 409; i8 410 411define arm_aapcs_vfpcc <16 x i8> @ptr_i8(<16 x i8*>* %offptr) { 412; CHECK-LABEL: ptr_i8: 413; CHECK: @ %bb.0: @ %entry 414; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 415; CHECK-NEXT: push {r4, r5, r6, r7, lr} 416; CHECK-NEXT: vldrw.u32 q0, [r0, #48] 417; CHECK-NEXT: vldrw.u32 q2, [r0] 418; CHECK-NEXT: vldrw.u32 q1, [r0, #32] 419; CHECK-NEXT: vmov r1, r2, d0 420; CHECK-NEXT: vmov r6, r7, d4 421; CHECK-NEXT: vmov r4, r3, d1 422; CHECK-NEXT: ldrb r5, [r1] 423; CHECK-NEXT: ldrb r1, [r2] 424; CHECK-NEXT: ldrb r2, [r6] 425; CHECK-NEXT: ldrb.w r12, [r3] 426; CHECK-NEXT: vmov.8 q0[0], r2 427; CHECK-NEXT: vmov r2, r3, d3 428; CHECK-NEXT: ldrb.w lr, [r4] 429; CHECK-NEXT: ldrb r4, [r2] 430; CHECK-NEXT: ldrb r2, [r3] 431; CHECK-NEXT: ldrb r3, [r7] 432; CHECK-NEXT: vmov.8 q0[1], r3 433; CHECK-NEXT: vmov r3, r6, d5 434; CHECK-NEXT: vldrw.u32 q2, [r0, #16] 435; CHECK-NEXT: ldrb r3, [r3] 436; CHECK-NEXT: ldrb r6, [r6] 437; CHECK-NEXT: vmov.8 q0[2], r3 438; CHECK-NEXT: vmov r0, r3, d4 439; CHECK-NEXT: vmov.8 q0[3], r6 440; CHECK-NEXT: ldrb r0, [r0] 441; CHECK-NEXT: ldrb r3, [r3] 442; CHECK-NEXT: vmov.8 q0[4], r0 443; CHECK-NEXT: vmov.8 q0[5], r3 444; CHECK-NEXT: vmov r0, r3, d5 445; CHECK-NEXT: ldrb r0, [r0] 446; CHECK-NEXT: ldrb r3, [r3] 447; CHECK-NEXT: vmov.8 q0[6], r0 448; CHECK-NEXT: vmov.8 q0[7], r3 449; CHECK-NEXT: vmov r0, r3, d2 450; CHECK-NEXT: ldrb r0, [r0] 451; CHECK-NEXT: ldrb r3, [r3] 452; CHECK-NEXT: vmov.8 q0[8], r0 453; CHECK-NEXT: vmov.8 q0[9], r3 454; CHECK-NEXT: vmov.8 q0[10], r4 455; CHECK-NEXT: vmov.8 q0[11], r2 456; CHECK-NEXT: vmov.8 q0[12], r5 457; CHECK-NEXT: vmov.8 q0[13], r1 458; CHECK-NEXT: vmov.8 q0[14], lr 459; CHECK-NEXT: vmov.8 q0[15], r12 460; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 461entry: 462 %offs = load <16 x i8*>, <16 x i8*>* %offptr, align 4 463 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %offs, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 464 ret <16 x i8> %gather 465} 466 467define arm_aapcs_vfpcc <8 x i16> @ptr_v8i8_sext16(<8 x i8*>* %offptr) { 468; CHECK-LABEL: ptr_v8i8_sext16: 469; CHECK: @ %bb.0: @ %entry 470; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 471; CHECK-NEXT: push {r4, r5, r6, r7, lr} 472; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 473; CHECK-NEXT: vmov r3, r1, d1 474; CHECK-NEXT: vmov r12, r2, d0 475; CHECK-NEXT: vldrw.u32 q0, [r0] 476; CHECK-NEXT: vmov r4, r5, d0 477; CHECK-NEXT: vmov lr, r0, d1 478; CHECK-NEXT: ldrb r7, [r1] 479; CHECK-NEXT: ldrb.w r1, [r12] 480; CHECK-NEXT: ldrb r2, [r2] 481; CHECK-NEXT: ldrb r4, [r4] 482; CHECK-NEXT: ldrb r5, [r5] 483; CHECK-NEXT: vmov.16 q0[0], r4 484; CHECK-NEXT: ldrb.w r6, [lr] 485; CHECK-NEXT: vmov.16 q0[1], r5 486; CHECK-NEXT: ldrb r0, [r0] 487; CHECK-NEXT: vmov.16 q0[2], r6 488; CHECK-NEXT: ldrb r3, [r3] 489; CHECK-NEXT: vmov.16 q0[3], r0 490; CHECK-NEXT: vmov.16 q0[4], r1 491; CHECK-NEXT: vmov.16 q0[5], r2 492; CHECK-NEXT: vmov.16 q0[6], r3 493; CHECK-NEXT: vmov.16 q0[7], r7 494; CHECK-NEXT: vmovlb.s8 q0, q0 495; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 496entry: 497 %offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4 498 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 499 %ext = sext <8 x i8> %gather to <8 x i16> 500 ret <8 x i16> %ext 501} 502 503define arm_aapcs_vfpcc <8 x i16> @ptr_v8i8_zext16(<8 x i8*>* %offptr) { 504; CHECK-LABEL: ptr_v8i8_zext16: 505; CHECK: @ %bb.0: @ %entry 506; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 507; CHECK-NEXT: push {r4, r5, r6, r7, lr} 508; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 509; CHECK-NEXT: vmov r3, r1, d1 510; CHECK-NEXT: vmov r12, r2, d0 511; CHECK-NEXT: vldrw.u32 q0, [r0] 512; CHECK-NEXT: vmov r4, r5, d0 513; CHECK-NEXT: vmov lr, r0, d1 514; CHECK-NEXT: ldrb r7, [r1] 515; CHECK-NEXT: ldrb.w r1, [r12] 516; CHECK-NEXT: ldrb r2, [r2] 517; CHECK-NEXT: ldrb r4, [r4] 518; CHECK-NEXT: ldrb r5, [r5] 519; CHECK-NEXT: vmov.16 q0[0], r4 520; CHECK-NEXT: ldrb.w r6, [lr] 521; CHECK-NEXT: vmov.16 q0[1], r5 522; CHECK-NEXT: ldrb r0, [r0] 523; CHECK-NEXT: vmov.16 q0[2], r6 524; CHECK-NEXT: ldrb r3, [r3] 525; CHECK-NEXT: vmov.16 q0[3], r0 526; CHECK-NEXT: vmov.16 q0[4], r1 527; CHECK-NEXT: vmov.16 q0[5], r2 528; CHECK-NEXT: vmov.16 q0[6], r3 529; CHECK-NEXT: vmov.16 q0[7], r7 530; CHECK-NEXT: vmovlb.u8 q0, q0 531; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 532entry: 533 %offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4 534 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 535 %ext = zext <8 x i8> %gather to <8 x i16> 536 ret <8 x i16> %ext 537} 538 539define arm_aapcs_vfpcc <8 x i8> @ptr_v8i8(<8 x i8*>* %offptr) { 540; CHECK-LABEL: ptr_v8i8: 541; CHECK: @ %bb.0: @ %entry 542; CHECK-NEXT: .save {r4, r5, r6, lr} 543; CHECK-NEXT: push {r4, r5, r6, lr} 544; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 545; CHECK-NEXT: vmov r1, r2, d0 546; CHECK-NEXT: vmov r3, r12, d1 547; CHECK-NEXT: vldrw.u32 q0, [r0] 548; CHECK-NEXT: vmov r4, r5, d0 549; CHECK-NEXT: vmov r0, lr, d1 550; CHECK-NEXT: ldrb r1, [r1] 551; CHECK-NEXT: ldrb r6, [r3] 552; CHECK-NEXT: ldrb r2, [r2] 553; CHECK-NEXT: ldrb r4, [r4] 554; CHECK-NEXT: ldrb r5, [r5] 555; CHECK-NEXT: vmov.16 q0[0], r4 556; CHECK-NEXT: ldrb r0, [r0] 557; CHECK-NEXT: vmov.16 q0[1], r5 558; CHECK-NEXT: ldrb.w r3, [lr] 559; CHECK-NEXT: vmov.16 q0[2], r0 560; CHECK-NEXT: ldrb.w r12, [r12] 561; CHECK-NEXT: vmov.16 q0[3], r3 562; CHECK-NEXT: vmov.16 q0[4], r1 563; CHECK-NEXT: vmov.16 q0[5], r2 564; CHECK-NEXT: vmov.16 q0[6], r6 565; CHECK-NEXT: vmov.16 q0[7], r12 566; CHECK-NEXT: pop {r4, r5, r6, pc} 567entry: 568 %offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4 569 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 570 ret <8 x i8> %gather 571} 572 573define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_sext32(<4 x i8*>* %offptr) { 574; CHECK-LABEL: ptr_v4i8_sext32: 575; CHECK: @ %bb.0: @ %entry 576; CHECK-NEXT: vldrw.u32 q1, [r0] 577; CHECK-NEXT: movs r1, #0 578; CHECK-NEXT: vldrb.s32 q0, [r1, q1] 579; CHECK-NEXT: bx lr 580entry: 581 %offs = load <4 x i8*>, <4 x i8*>* %offptr, align 4 582 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %offs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 583 %ext = sext <4 x i8> %gather to <4 x i32> 584 ret <4 x i32> %ext 585} 586 587define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_zext32(<4 x i8*>* %offptr) { 588; CHECK-LABEL: ptr_v4i8_zext32: 589; CHECK: @ %bb.0: @ %entry 590; CHECK-NEXT: vldrw.u32 q1, [r0] 591; CHECK-NEXT: movs r1, #0 592; CHECK-NEXT: vldrb.u32 q0, [r1, q1] 593; CHECK-NEXT: bx lr 594entry: 595 %offs = load <4 x i8*>, <4 x i8*>* %offptr, align 4 596 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %offs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 597 %ext = zext <4 x i8> %gather to <4 x i32> 598 ret <4 x i32> %ext 599} 600 601define arm_aapcs_vfpcc <4 x i8> @ptr_v4i8(<4 x i8*>* %offptr) { 602; CHECK-LABEL: ptr_v4i8: 603; CHECK: @ %bb.0: @ %entry 604; CHECK-NEXT: vldrw.u32 q1, [r0] 605; CHECK-NEXT: movs r1, #0 606; CHECK-NEXT: vldrb.u32 q0, [r1, q1] 607; CHECK-NEXT: bx lr 608entry: 609 %offs = load <4 x i8*>, <4 x i8*>* %offptr, align 4 610 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %offs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 611 ret <4 x i8> %gather 612} 613 614define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_sext32(<8 x i8*>* %offptr) { 615; CHECK-LABEL: ptr_v8i8_sext32: 616; CHECK: @ %bb.0: @ %entry 617; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 618; CHECK-NEXT: push {r4, r5, r6, r7, lr} 619; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 620; CHECK-NEXT: vmov r1, r2, d1 621; CHECK-NEXT: vmov r3, r12, d0 622; CHECK-NEXT: vldrw.u32 q0, [r0] 623; CHECK-NEXT: vmov r0, lr, d1 624; CHECK-NEXT: ldrb r7, [r2] 625; CHECK-NEXT: vmov r2, r4, d0 626; CHECK-NEXT: ldrb r6, [r1] 627; CHECK-NEXT: ldrb r3, [r3] 628; CHECK-NEXT: ldrb r0, [r0] 629; CHECK-NEXT: ldrb.w r1, [r12] 630; CHECK-NEXT: vmov q1[2], q1[0], r3, r6 631; CHECK-NEXT: ldrb.w r5, [lr] 632; CHECK-NEXT: vmov q1[3], q1[1], r1, r7 633; CHECK-NEXT: vmovlb.s8 q1, q1 634; CHECK-NEXT: vmovlb.s16 q1, q1 635; CHECK-NEXT: ldrb r2, [r2] 636; CHECK-NEXT: ldrb r4, [r4] 637; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 638; CHECK-NEXT: vmov q0[3], q0[1], r4, r5 639; CHECK-NEXT: vmovlb.s8 q0, q0 640; CHECK-NEXT: vmovlb.s16 q0, q0 641; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 642entry: 643 %offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4 644 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 645 %ext = sext <8 x i8> %gather to <8 x i32> 646 ret <8 x i32> %ext 647} 648 649define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_zext32(<8 x i8*>* %offptr) { 650; CHECK-LABEL: ptr_v8i8_zext32: 651; CHECK: @ %bb.0: @ %entry 652; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 653; CHECK-NEXT: push {r4, r5, r6, r7, lr} 654; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 655; CHECK-NEXT: vmov.i32 q1, #0xff 656; CHECK-NEXT: vmov r1, r2, d1 657; CHECK-NEXT: vmov r12, r3, d0 658; CHECK-NEXT: vldrw.u32 q0, [r0] 659; CHECK-NEXT: vmov r4, r5, d0 660; CHECK-NEXT: vmov r0, lr, d1 661; CHECK-NEXT: ldrb r7, [r2] 662; CHECK-NEXT: ldrb r1, [r1] 663; CHECK-NEXT: ldrb.w r2, [r12] 664; CHECK-NEXT: ldrb r4, [r4] 665; CHECK-NEXT: ldrb r0, [r0] 666; CHECK-NEXT: vmov q2[2], q2[0], r2, r1 667; CHECK-NEXT: ldrb r3, [r3] 668; CHECK-NEXT: ldrb.w r6, [lr] 669; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 670; CHECK-NEXT: ldrb r5, [r5] 671; CHECK-NEXT: vmov q2[3], q2[1], r3, r7 672; CHECK-NEXT: vmov q0[3], q0[1], r5, r6 673; CHECK-NEXT: vand q0, q0, q1 674; CHECK-NEXT: vand q1, q2, q1 675; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 676entry: 677 %offs = load <8 x i8*>, <8 x i8*>* %offptr, align 4 678 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 679 %ext = zext <8 x i8> %gather to <8 x i32> 680 ret <8 x i32> %ext 681} 682 683; loops 684 685define void @foo_ptr_p_int32_t(i32* %dest, i32** %src, i32 %n) { 686; CHECK-LABEL: foo_ptr_p_int32_t: 687; CHECK: @ %bb.0: @ %entry 688; CHECK-NEXT: .save {r7, lr} 689; CHECK-NEXT: push {r7, lr} 690; CHECK-NEXT: bic r2, r2, #15 691; CHECK-NEXT: cmp r2, #1 692; CHECK-NEXT: it lt 693; CHECK-NEXT: poplt {r7, pc} 694; CHECK-NEXT: .LBB26_1: @ %vector.body.preheader 695; CHECK-NEXT: subs r2, #4 696; CHECK-NEXT: movs r3, #1 697; CHECK-NEXT: add.w lr, r3, r2, lsr #2 698; CHECK-NEXT: .LBB26_2: @ %vector.body 699; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 700; CHECK-NEXT: vldrw.u32 q0, [r1], #16 701; CHECK-NEXT: vptt.i32 ne, q0, zr 702; CHECK-NEXT: vldrwt.u32 q1, [q0] 703; CHECK-NEXT: vstrwt.32 q1, [r0], #16 704; CHECK-NEXT: le lr, .LBB26_2 705; CHECK-NEXT: @ %bb.3: @ %for.end 706; CHECK-NEXT: pop {r7, pc} 707entry: 708 %and = and i32 %n, -16 709 %cmp11 = icmp sgt i32 %and, 0 710 br i1 %cmp11, label %vector.body, label %for.end 711 712vector.body: ; preds = %entry, %vector.body 713 %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ] 714 %0 = getelementptr inbounds i32*, i32** %src, i32 %index 715 %1 = bitcast i32** %0 to <4 x i32*>* 716 %wide.load = load <4 x i32*>, <4 x i32*>* %1, align 4 717 %2 = icmp ne <4 x i32*> %wide.load, zeroinitializer 718 %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %wide.load, i32 4, <4 x i1> %2, <4 x i32> undef) 719 %3 = getelementptr inbounds i32, i32* %dest, i32 %index 720 %4 = bitcast i32* %3 to <4 x i32>* 721 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.gather, <4 x i32>* %4, i32 4, <4 x i1> %2) 722 %index.next = add i32 %index, 4 723 %5 = icmp eq i32 %index.next, %and 724 br i1 %5, label %for.end, label %vector.body 725 726for.end: ; preds = %vector.body, %entry 727 ret void 728} 729 730define void @foo_ptr_p_float(float* %dest, float** %src, i32 %n) { 731; CHECK-LABEL: foo_ptr_p_float: 732; CHECK: @ %bb.0: @ %entry 733; CHECK-NEXT: .save {r7, lr} 734; CHECK-NEXT: push {r7, lr} 735; CHECK-NEXT: bic r2, r2, #15 736; CHECK-NEXT: cmp r2, #1 737; CHECK-NEXT: it lt 738; CHECK-NEXT: poplt {r7, pc} 739; CHECK-NEXT: .LBB27_1: @ %vector.body.preheader 740; CHECK-NEXT: subs r2, #4 741; CHECK-NEXT: movs r3, #1 742; CHECK-NEXT: add.w lr, r3, r2, lsr #2 743; CHECK-NEXT: .LBB27_2: @ %vector.body 744; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 745; CHECK-NEXT: vldrw.u32 q0, [r1], #16 746; CHECK-NEXT: vptt.i32 ne, q0, zr 747; CHECK-NEXT: vldrwt.u32 q1, [q0] 748; CHECK-NEXT: vstrwt.32 q1, [r0], #16 749; CHECK-NEXT: le lr, .LBB27_2 750; CHECK-NEXT: @ %bb.3: @ %for.end 751; CHECK-NEXT: pop {r7, pc} 752entry: 753 %and = and i32 %n, -16 754 %cmp11 = icmp sgt i32 %and, 0 755 br i1 %cmp11, label %vector.body, label %for.end 756 757vector.body: ; preds = %entry, %vector.body 758 %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ] 759 %0 = getelementptr inbounds float*, float** %src, i32 %index 760 %1 = bitcast float** %0 to <4 x float*>* 761 %wide.load = load <4 x float*>, <4 x float*>* %1, align 4 762 %2 = icmp ne <4 x float*> %wide.load, zeroinitializer 763 %3 = bitcast <4 x float*> %wide.load to <4 x i32*> 764 %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> %2, <4 x i32> undef) 765 %4 = getelementptr inbounds float, float* %dest, i32 %index 766 %5 = bitcast float* %4 to <4 x i32>* 767 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.gather, <4 x i32>* %5, i32 4, <4 x i1> %2) 768 %index.next = add i32 %index, 4 769 %6 = icmp eq i32 %index.next, %and 770 br i1 %6, label %for.end, label %vector.body 771 772for.end: ; preds = %vector.body, %entry 773 ret void 774} 775 776define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x i32*> %p) { 777; CHECK-LABEL: qi4: 778; CHECK: @ %bb.0: @ %entry 779; CHECK-NEXT: vmov.i32 q1, #0x10 780; CHECK-NEXT: vadd.i32 q1, q0, q1 781; CHECK-NEXT: vldrw.u32 q0, [q1] 782; CHECK-NEXT: bx lr 783entry: 784 %g = getelementptr inbounds i32, <4 x i32*> %p, i32 4 785 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %g, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 786 ret <4 x i32> %gather 787} 788 789define arm_aapcs_vfpcc <8 x i32> @sext_unsigned_unscaled_i8_i8_toi64(i8* %base, <8 x i8>* %offptr) { 790; CHECK-LABEL: sext_unsigned_unscaled_i8_i8_toi64: 791; CHECK: @ %bb.0: @ %entry 792; CHECK-NEXT: vldrb.u16 q0, [r1] 793; CHECK-NEXT: vldrb.u16 q1, [r0, q0] 794; CHECK-NEXT: vmov.u16 r0, q1[2] 795; CHECK-NEXT: vmov.u16 r1, q1[0] 796; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 797; CHECK-NEXT: vmov.u16 r0, q1[3] 798; CHECK-NEXT: vmov.u16 r1, q1[1] 799; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 800; CHECK-NEXT: vmov.u16 r0, q1[6] 801; CHECK-NEXT: vmov.u16 r1, q1[4] 802; CHECK-NEXT: vmovlb.s8 q0, q0 803; CHECK-NEXT: vmov q2[2], q2[0], r1, r0 804; CHECK-NEXT: vmov.u16 r0, q1[7] 805; CHECK-NEXT: vmov.u16 r1, q1[5] 806; CHECK-NEXT: vmovlb.s16 q0, q0 807; CHECK-NEXT: vmov q2[3], q2[1], r1, r0 808; CHECK-NEXT: vmovlb.s8 q1, q2 809; CHECK-NEXT: vmovlb.s16 q1, q1 810; CHECK-NEXT: bx lr 811entry: 812 %offs = load <8 x i8>, <8 x i8>* %offptr, align 1 813 %offs.zext = zext <8 x i8> %offs to <8 x i32> 814 %ptrs = getelementptr inbounds i8, i8* %base, <8 x i32> %offs.zext 815 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 816 %gather.sext = sext <8 x i8> %gather to <8 x i32> 817 ret <8 x i32> %gather.sext 818} 819 820declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) 821declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) 822declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>, i32, <8 x i1>, <8 x i32>) 823declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) 824declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>) 825declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) 826declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>) 827declare <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*>, i32, <2 x i1>, <2 x i16>) 828declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) 829declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>) 830declare <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*>, i32, <16 x i1>, <16 x i16>) 831declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>) 832declare <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*>, i32, <8 x i1>, <8 x half>) 833declare <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*>, i32, <16 x i1>, <16 x half>) 834declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) 835declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>) 836declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) 837declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>) 838declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) 839