1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @zext_scaled_i16_i32(i16* %base, <4 x i32>* %offptr) { 5; CHECK-LABEL: zext_scaled_i16_i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrw.u32 q1, [r1] 8; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] 9; CHECK-NEXT: bx lr 10entry: 11 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 12 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs 13 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 14 %gather.zext = zext <4 x i16> %gather to <4 x i32> 15 ret <4 x i32> %gather.zext 16} 17 18define arm_aapcs_vfpcc <4 x i32> @zext_scaled_i16_i32_opaque(ptr %base, ptr %offptr) { 19; CHECK-LABEL: zext_scaled_i16_i32_opaque: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vldrw.u32 q1, [r1] 22; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] 23; CHECK-NEXT: bx lr 24entry: 25 %offs = load <4 x i32>, ptr %offptr, align 4 26 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs 27 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 28 %gather.zext = zext <4 x i16> %gather to <4 x i32> 29 ret <4 x i32> %gather.zext 30} 31 32define arm_aapcs_vfpcc <4 x i32> @sext_scaled_i16_i32(i16* %base, <4 x i32>* %offptr) { 33; CHECK-LABEL: sext_scaled_i16_i32: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vldrw.u32 q1, [r1] 36; CHECK-NEXT: vldrh.s32 q0, [r0, q1, uxtw #1] 37; CHECK-NEXT: bx lr 38entry: 39 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 40 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs 41 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 42 %gather.sext = sext <4 x i16> %gather to <4 x i32> 43 ret <4 x i32> %gather.sext 44} 45 46define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32(i32* %base, <4 x i32>* %offptr) { 47; CHECK-LABEL: scaled_i32_i32: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vldrw.u32 q1, [r1] 50; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 51; CHECK-NEXT: bx lr 52entry: 53 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 54 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs 55 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 56 ret <4 x i32> %gather 57} 58 59; TODO: scaled_f16_i32 60 61define arm_aapcs_vfpcc <4 x float> @scaled_f32_i32(i32* %base, <4 x i32>* %offptr) { 62; CHECK-LABEL: scaled_f32_i32: 63; CHECK: @ %bb.0: @ %entry 64; CHECK-NEXT: vldrw.u32 q1, [r1] 65; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 66; CHECK-NEXT: bx lr 67entry: 68 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 69 %i32_ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs 70 %ptrs = bitcast <4 x i32*> %i32_ptrs to <4 x float*> 71 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 72 ret <4 x float> %gather 73} 74 75define arm_aapcs_vfpcc <4 x float> @scaled_f32_i32_opaque(ptr %base, ptr %offptr) { 76; CHECK-LABEL: scaled_f32_i32_opaque: 77; CHECK: @ %bb.0: @ %entry 78; CHECK-NEXT: vldrw.u32 q1, [r1] 79; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 80; CHECK-NEXT: bx lr 81entry: 82 %offs = load <4 x i32>, ptr %offptr, align 4 83 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs 84 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> 85 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 86 ret <4 x float> %gather 87} 88 89define arm_aapcs_vfpcc <4 x i32> @unsigned_scaled_b_i32_i16(i32* %base, <4 x i16>* %offptr) { 90; CHECK-LABEL: unsigned_scaled_b_i32_i16: 91; CHECK: @ %bb.0: @ %entry 92; CHECK-NEXT: vldrh.u32 q1, [r1] 93; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 94; CHECK-NEXT: bx lr 95entry: 96 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 97 %offs.zext = zext <4 x i16> %offs to <4 x i32> 98 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.zext 99 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 100 ret <4 x i32> %gather 101} 102 103define arm_aapcs_vfpcc <4 x i32> @signed_scaled_i32_i16(i32* %base, <4 x i16>* %offptr) { 104; CHECK-LABEL: signed_scaled_i32_i16: 105; CHECK: @ %bb.0: @ %entry 106; CHECK-NEXT: vldrh.s32 q1, [r1] 107; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 108; CHECK-NEXT: bx lr 109entry: 110 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 111 %offs.sext = sext <4 x i16> %offs to <4 x i32> 112 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.sext 113 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 114 ret <4 x i32> %gather 115} 116 117define arm_aapcs_vfpcc <4 x i32> @unsigned_scaled_b_i32_i16_opaque(ptr %base, ptr %offptr) { 118; CHECK-LABEL: unsigned_scaled_b_i32_i16_opaque: 119; CHECK: @ %bb.0: @ %entry 120; CHECK-NEXT: vldrh.u32 q1, [r1] 121; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 122; CHECK-NEXT: bx lr 123entry: 124 %offs = load <4 x i16>, ptr %offptr, align 2 125 %offs.zext = zext <4 x i16> %offs to <4 x i32> 126 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext 127 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 128 ret <4 x i32> %gather 129} 130 131define arm_aapcs_vfpcc <4 x i32> @signed_scaled_i32_i16_opaque(ptr %base, ptr %offptr) { 132; CHECK-LABEL: signed_scaled_i32_i16_opaque: 133; CHECK: @ %bb.0: @ %entry 134; CHECK-NEXT: vldrh.s32 q1, [r1] 135; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 136; CHECK-NEXT: bx lr 137entry: 138 %offs = load <4 x i16>, ptr %offptr, align 2 139 %offs.sext = sext <4 x i16> %offs to <4 x i32> 140 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext 141 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 142 ret <4 x i32> %gather 143} 144 145define arm_aapcs_vfpcc <4 x float> @a_unsigned_scaled_f32_i16(i32* %base, <4 x i16>* %offptr) { 146; CHECK-LABEL: a_unsigned_scaled_f32_i16: 147; CHECK: @ %bb.0: @ %entry 148; CHECK-NEXT: vldrh.u32 q1, [r1] 149; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 150; CHECK-NEXT: bx lr 151entry: 152 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 153 %offs.zext = zext <4 x i16> %offs to <4 x i32> 154 %i32_ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.zext 155 %ptrs = bitcast <4 x i32*> %i32_ptrs to <4 x float*> 156 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 157 ret <4 x float> %gather 158} 159 160define arm_aapcs_vfpcc <4 x float> @b_signed_scaled_f32_i16(i32* %base, <4 x i16>* %offptr) { 161; CHECK-LABEL: b_signed_scaled_f32_i16: 162; CHECK: @ %bb.0: @ %entry 163; CHECK-NEXT: vldrh.s32 q1, [r1] 164; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 165; CHECK-NEXT: bx lr 166entry: 167 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 168 %offs.sext = sext <4 x i16> %offs to <4 x i32> 169 %i32_ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.sext 170 %ptrs = bitcast <4 x i32*> %i32_ptrs to <4 x float*> 171 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 172 ret <4 x float> %gather 173} 174 175define arm_aapcs_vfpcc <4 x i32> @zext_signed_scaled_i16_i16(i16* %base, <4 x i16>* %offptr) { 176; CHECK-LABEL: zext_signed_scaled_i16_i16: 177; CHECK: @ %bb.0: @ %entry 178; CHECK-NEXT: vldrh.s32 q1, [r1] 179; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] 180; CHECK-NEXT: bx lr 181entry: 182 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 183 %offs.sext = sext <4 x i16> %offs to <4 x i32> 184 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.sext 185 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 186 %gather.zext = zext <4 x i16> %gather to <4 x i32> 187 ret <4 x i32> %gather.zext 188} 189 190define arm_aapcs_vfpcc <4 x i32> @sext_signed_scaled_i16_i16(i16* %base, <4 x i16>* %offptr) { 191; CHECK-LABEL: sext_signed_scaled_i16_i16: 192; CHECK: @ %bb.0: @ %entry 193; CHECK-NEXT: vldrh.s32 q1, [r1] 194; CHECK-NEXT: vldrh.s32 q0, [r0, q1, uxtw #1] 195; CHECK-NEXT: bx lr 196entry: 197 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 198 %offs.sext = sext <4 x i16> %offs to <4 x i32> 199 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.sext 200 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 201 %gather.sext = sext <4 x i16> %gather to <4 x i32> 202 ret <4 x i32> %gather.sext 203} 204 205define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_scaled_i16_i16(i16* %base, <4 x i16>* %offptr) { 206; CHECK-LABEL: zext_unsigned_scaled_i16_i16: 207; CHECK: @ %bb.0: @ %entry 208; CHECK-NEXT: vldrh.u32 q1, [r1] 209; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] 210; CHECK-NEXT: bx lr 211entry: 212 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 213 %offs.zext = zext <4 x i16> %offs to <4 x i32> 214 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.zext 215 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 216 %gather.zext = zext <4 x i16> %gather to <4 x i32> 217 ret <4 x i32> %gather.zext 218} 219 220define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_scaled_i16_i16(i16* %base, <4 x i16>* %offptr) { 221; CHECK-LABEL: sext_unsigned_scaled_i16_i16: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vldrh.u32 q1, [r1] 224; CHECK-NEXT: vldrh.s32 q0, [r0, q1, uxtw #1] 225; CHECK-NEXT: bx lr 226entry: 227 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 228 %offs.zext = zext <4 x i16> %offs to <4 x i32> 229 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.zext 230 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 231 %gather.sext = sext <4 x i16> %gather to <4 x i32> 232 ret <4 x i32> %gather.sext 233} 234 235define arm_aapcs_vfpcc <4 x i32> @unsigned_scaled_b_i32_i8(i32* %base, <4 x i8>* %offptr) { 236; CHECK-LABEL: unsigned_scaled_b_i32_i8: 237; CHECK: @ %bb.0: @ %entry 238; CHECK-NEXT: vldrb.u32 q1, [r1] 239; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 240; CHECK-NEXT: bx lr 241entry: 242 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 243 %offs.zext = zext <4 x i8> %offs to <4 x i32> 244 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.zext 245 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 246 ret <4 x i32> %gather 247} 248 249define arm_aapcs_vfpcc <4 x i32> @signed_scaled_i32_i8(i32* %base, <4 x i8>* %offptr) { 250; CHECK-LABEL: signed_scaled_i32_i8: 251; CHECK: @ %bb.0: @ %entry 252; CHECK-NEXT: vldrb.s32 q1, [r1] 253; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 254; CHECK-NEXT: bx lr 255entry: 256 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 257 %offs.sext = sext <4 x i8> %offs to <4 x i32> 258 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.sext 259 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 260 ret <4 x i32> %gather 261} 262 263define arm_aapcs_vfpcc <4 x float> @a_unsigned_scaled_f32_i8(i32* %base, <4 x i8>* %offptr) { 264; CHECK-LABEL: a_unsigned_scaled_f32_i8: 265; CHECK: @ %bb.0: @ %entry 266; CHECK-NEXT: vldrb.u32 q1, [r1] 267; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 268; CHECK-NEXT: bx lr 269entry: 270 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 271 %offs.zext = zext <4 x i8> %offs to <4 x i32> 272 %i32_ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.zext 273 %ptrs = bitcast <4 x i32*> %i32_ptrs to <4 x float*> 274 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 275 ret <4 x float> %gather 276} 277 278define arm_aapcs_vfpcc <4 x float> @b_signed_scaled_f32_i8(i32* %base, <4 x i8>* %offptr) { 279; CHECK-LABEL: b_signed_scaled_f32_i8: 280; CHECK: @ %bb.0: @ %entry 281; CHECK-NEXT: vldrb.s32 q1, [r1] 282; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 283; CHECK-NEXT: bx lr 284entry: 285 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 286 %offs.sext = sext <4 x i8> %offs to <4 x i32> 287 %i32_ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs.sext 288 %ptrs = bitcast <4 x i32*> %i32_ptrs to <4 x float*> 289 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 290 ret <4 x float> %gather 291} 292 293define arm_aapcs_vfpcc <4 x i32> @zext_signed_scaled_i16_i8(i16* %base, <4 x i8>* %offptr) { 294; CHECK-LABEL: zext_signed_scaled_i16_i8: 295; CHECK: @ %bb.0: @ %entry 296; CHECK-NEXT: vldrb.s32 q1, [r1] 297; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] 298; CHECK-NEXT: bx lr 299entry: 300 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 301 %offs.sext = sext <4 x i8> %offs to <4 x i32> 302 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.sext 303 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 304 %gather.zext = zext <4 x i16> %gather to <4 x i32> 305 ret <4 x i32> %gather.zext 306} 307 308define arm_aapcs_vfpcc <4 x i32> @sext_signed_scaled_i16_i8(i16* %base, <4 x i8>* %offptr) { 309; CHECK-LABEL: sext_signed_scaled_i16_i8: 310; CHECK: @ %bb.0: @ %entry 311; CHECK-NEXT: vldrb.s32 q1, [r1] 312; CHECK-NEXT: vldrh.s32 q0, [r0, q1, uxtw #1] 313; CHECK-NEXT: bx lr 314entry: 315 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 316 %offs.sext = sext <4 x i8> %offs to <4 x i32> 317 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.sext 318 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 319 %gather.sext = sext <4 x i16> %gather to <4 x i32> 320 ret <4 x i32> %gather.sext 321} 322 323define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_scaled_i16_i8(i16* %base, <4 x i8>* %offptr) { 324; CHECK-LABEL: zext_unsigned_scaled_i16_i8: 325; CHECK: @ %bb.0: @ %entry 326; CHECK-NEXT: vldrb.u32 q1, [r1] 327; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] 328; CHECK-NEXT: bx lr 329entry: 330 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 331 %offs.zext = zext <4 x i8> %offs to <4 x i32> 332 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.zext 333 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 334 %gather.zext = zext <4 x i16> %gather to <4 x i32> 335 ret <4 x i32> %gather.zext 336} 337 338define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_scaled_i16_i8(i16* %base, <4 x i8>* %offptr) { 339; CHECK-LABEL: sext_unsigned_scaled_i16_i8: 340; CHECK: @ %bb.0: @ %entry 341; CHECK-NEXT: vldrb.u32 q1, [r1] 342; CHECK-NEXT: vldrh.s32 q0, [r0, q1, uxtw #1] 343; CHECK-NEXT: bx lr 344entry: 345 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 346 %offs.zext = zext <4 x i8> %offs to <4 x i32> 347 %ptrs = getelementptr inbounds i16, i16* %base, <4 x i32> %offs.zext 348 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 349 %gather.sext = sext <4 x i16> %gather to <4 x i32> 350 ret <4 x i32> %gather.sext 351} 352 353define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep(i32* %base, <4 x i32>* %offptr) { 354; CHECK-LABEL: scaled_i32_i32_2gep: 355; CHECK: @ %bb.0: @ %entry 356; CHECK-NEXT: vldrw.u32 q1, [r1] 357; CHECK-NEXT: vmov.i32 q0, #0x14 358; CHECK-NEXT: vshl.i32 q1, q1, #2 359; CHECK-NEXT: vadd.i32 q1, q1, r0 360; CHECK-NEXT: vadd.i32 q1, q1, q0 361; CHECK-NEXT: vldrw.u32 q0, [q1] 362; CHECK-NEXT: bx lr 363entry: 364 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 365 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> %offs 366 %ptrs2 = getelementptr inbounds i32, <4 x i32*> %ptrs, i32 5 367 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 368 ret <4 x i32> %gather 369} 370 371define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep2(i32* %base) { 372; CHECK-LABEL: scaled_i32_i32_2gep2: 373; CHECK: @ %bb.0: @ %entry 374; CHECK-NEXT: adr r1, .LCPI25_0 375; CHECK-NEXT: vldrw.u32 q1, [r1] 376; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 377; CHECK-NEXT: bx lr 378; CHECK-NEXT: .p2align 4 379; CHECK-NEXT: @ %bb.1: 380; CHECK-NEXT: .LCPI25_0: 381; CHECK-NEXT: .long 5 @ 0x5 382; CHECK-NEXT: .long 8 @ 0x8 383; CHECK-NEXT: .long 11 @ 0xb 384; CHECK-NEXT: .long 14 @ 0xe 385entry: 386 %ptrs = getelementptr inbounds i32, i32* %base, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 387 %ptrs2 = getelementptr inbounds i32, <4 x i32*> %ptrs, i32 5 388 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 389 ret <4 x i32> %gather 390} 391 392define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep_opaque(ptr %base, ptr %offptr) { 393; CHECK-LABEL: scaled_i32_i32_2gep_opaque: 394; CHECK: @ %bb.0: @ %entry 395; CHECK-NEXT: vldrw.u32 q1, [r1] 396; CHECK-NEXT: vmov.i32 q0, #0x14 397; CHECK-NEXT: vshl.i32 q1, q1, #2 398; CHECK-NEXT: vadd.i32 q1, q1, r0 399; CHECK-NEXT: vadd.i32 q1, q1, q0 400; CHECK-NEXT: vldrw.u32 q0, [q1] 401; CHECK-NEXT: bx lr 402entry: 403 %offs = load <4 x i32>, ptr %offptr, align 4 404 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs 405 %ptrs2 = getelementptr inbounds i32, <4 x ptr> %ptrs, i32 5 406 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 407 ret <4 x i32> %gather 408} 409 410define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep2_opaque(ptr %base) { 411; CHECK-LABEL: scaled_i32_i32_2gep2_opaque: 412; CHECK: @ %bb.0: @ %entry 413; CHECK-NEXT: adr r1, .LCPI27_0 414; CHECK-NEXT: vldrw.u32 q1, [r1] 415; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] 416; CHECK-NEXT: bx lr 417; CHECK-NEXT: .p2align 4 418; CHECK-NEXT: @ %bb.1: 419; CHECK-NEXT: .LCPI27_0: 420; CHECK-NEXT: .long 5 @ 0x5 421; CHECK-NEXT: .long 8 @ 0x8 422; CHECK-NEXT: .long 11 @ 0xb 423; CHECK-NEXT: .long 14 @ 0xe 424entry: 425 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 426 %ptrs2 = getelementptr inbounds i32, <4 x ptr> %ptrs, i32 5 427 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs2, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 428 ret <4 x i32> %gather 429} 430 431declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) 432declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) 433declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) 434declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) 435declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) 436declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>) 437declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) 438declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) 439