1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -codegenprepare < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6%struct.a = type { i32, i32 } 7@c = external dso_local global %struct.a, align 4 8@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16 9 10define <vscale x 4 x i32> @splat_base(i32* %base, <vscale x 4 x i64> %index, <vscale x 4 x i1> %mask) #0 { 11; CHECK-LABEL: @splat_base( 12; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <vscale x 4 x i64> [[INDEX:%.*]] 13; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef) 14; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]] 15; 16 %broadcast.splatinsert = insertelement <vscale x 4 x i32*> undef, i32* %base, i32 0 17 %broadcast.splat = shufflevector <vscale x 4 x i32*> %broadcast.splatinsert, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer 18 %gep = getelementptr i32, <vscale x 4 x i32*> %broadcast.splat, <vscale x 4 x i64> %index 19 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 20 ret <vscale x 4 x i32> %res 21} 22 23define <vscale x 4 x i32> @splat_struct(%struct.a* %base, <vscale x 4 x i1> %mask) #0 { 24; CHECK-LABEL: @splat_struct( 25; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1 26; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer 27; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef) 28; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]] 29; 30 %gep = getelementptr %struct.a, %struct.a* %base, <vscale x 4 x i64> zeroinitializer, i32 1 31 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 32 ret <vscale x 4 x i32> %res 33} 34 35define <vscale x 4 x i32> @scalar_index(i32* %base, i64 %index, <vscale x 4 x i1> %mask) #0 { 36; CHECK-LABEL: @scalar_index( 37; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]] 38; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer 39; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef) 40; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]] 41; 42 %broadcast.splatinsert = insertelement <vscale x 4 x i32*> undef, i32* %base, i32 0 43 %broadcast.splat = shufflevector <vscale x 4 x i32*> %broadcast.splatinsert, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer 44 %gep = getelementptr i32, <vscale x 4 x i32*> %broadcast.splat, i64 %index 45 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 46 ret <vscale x 4 x i32> %res 47} 48 49define <vscale x 4 x i32> @splat_index(i32* %base, i64 %index, <vscale x 4 x i1> %mask) #0 { 50; CHECK-LABEL: @splat_index( 51; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]] 52; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer 53; CHECK-NEXT: [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef) 54; CHECK-NEXT: ret <vscale x 4 x i32> [[RES]] 55; 56 %broadcast.splatinsert = insertelement <vscale x 4 x i64> undef, i64 %index, i32 0 57 %broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer 58 %gep = getelementptr i32, i32* %base, <vscale x 4 x i64> %broadcast.splat 59 %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 60 ret <vscale x 4 x i32> %res 61} 62 63define <vscale x 4 x i32> @test_global_array(<vscale x 4 x i64> %indxs, <vscale x 4 x i1> %mask) #0 { 64; CHECK-LABEL: @test_global_array( 65; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <vscale x 4 x i64> [[INDXS:%.*]] 66; CHECK-NEXT: [[G:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef) 67; CHECK-NEXT: ret <vscale x 4 x i32> [[G]] 68; 69 %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <vscale x 4 x i64> %indxs 70 %g = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %p, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 71 ret <vscale x 4 x i32> %g 72} 73 74define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 { 75; CHECK-LABEL: @global_struct_splat( 76; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> shufflevector (<vscale x 4 x i32*> insertelement (<vscale x 4 x i32*> undef, i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32 0), <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef) 77; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] 78; 79 %1 = insertelement <vscale x 4 x %struct.a*> undef, %struct.a* @c, i32 0 80 %2 = shufflevector <vscale x 4 x %struct.a*> %1, <vscale x 4 x %struct.a*> undef, <vscale x 4 x i32> zeroinitializer 81 %3 = getelementptr %struct.a, <vscale x 4 x %struct.a*> %2, <vscale x 4 x i64> zeroinitializer, i32 1 82 %4 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %3, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 83 ret <vscale x 4 x i32> %4 84} 85 86define <vscale x 4 x i32> @splat_ptr_gather(i32* %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) #0 { 87; CHECK-LABEL: @splat_ptr_gather( 88; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer 89; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> [[PASSTHRU:%.*]]) 90; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]] 91; 92 %1 = insertelement <vscale x 4 x i32*> undef, i32* %ptr, i32 0 93 %2 = shufflevector <vscale x 4 x i32*> %1, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer 94 %3 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %2, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) 95 ret <vscale x 4 x i32> %3 96} 97 98define void @splat_ptr_scatter(i32* %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %val) #0 { 99; CHECK-LABEL: @splat_ptr_scatter( 100; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer 101; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[VAL:%.*]], <vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]]) 102; CHECK-NEXT: ret void 103; 104 %1 = insertelement <vscale x 4 x i32*> undef, i32* %ptr, i32 0 105 %2 = shufflevector <vscale x 4 x i32*> %1, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer 106 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %2, i32 4, <vscale x 4 x i1> %mask) 107 ret void 108} 109 110declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) 111declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>) 112 113attributes #0 = { "target-features"="+sve" } 114