1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -codegenprepare < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6%struct.a = type { i32, i32 }
7@c = external dso_local global %struct.a, align 4
8@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
9
10define <vscale x 4 x i32> @splat_base(i32* %base, <vscale x 4 x i64> %index, <vscale x 4 x i1> %mask) #0 {
11; CHECK-LABEL: @splat_base(
12; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <vscale x 4 x i64> [[INDEX:%.*]]
13; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
14; CHECK-NEXT:    ret <vscale x 4 x i32> [[RES]]
15;
16  %broadcast.splatinsert = insertelement <vscale x 4 x i32*> undef, i32* %base, i32 0
17  %broadcast.splat = shufflevector <vscale x 4 x i32*> %broadcast.splatinsert, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
18  %gep = getelementptr i32, <vscale x 4 x i32*> %broadcast.splat, <vscale x 4 x i64> %index
19  %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
20  ret <vscale x 4 x i32> %res
21}
22
23define <vscale x 4 x i32> @splat_struct(%struct.a* %base, <vscale x 4 x i1> %mask) #0 {
24; CHECK-LABEL: @splat_struct(
25; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1
26; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer
27; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
28; CHECK-NEXT:    ret <vscale x 4 x i32> [[RES]]
29;
30  %gep = getelementptr %struct.a, %struct.a* %base, <vscale x 4 x i64> zeroinitializer, i32 1
31  %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
32  ret <vscale x 4 x i32> %res
33}
34
35define <vscale x 4 x i32> @scalar_index(i32* %base, i64 %index, <vscale x 4 x i1> %mask) #0 {
36; CHECK-LABEL: @scalar_index(
37; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
38; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer
39; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
40; CHECK-NEXT:    ret <vscale x 4 x i32> [[RES]]
41;
42  %broadcast.splatinsert = insertelement <vscale x 4 x i32*> undef, i32* %base, i32 0
43  %broadcast.splat = shufflevector <vscale x 4 x i32*> %broadcast.splatinsert, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
44  %gep = getelementptr i32, <vscale x 4 x i32*> %broadcast.splat, i64 %index
45  %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
46  ret <vscale x 4 x i32> %res
47}
48
49define <vscale x 4 x i32> @splat_index(i32* %base, i64 %index, <vscale x 4 x i1> %mask) #0 {
50; CHECK-LABEL: @splat_index(
51; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
52; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <vscale x 4 x i64> zeroinitializer
53; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP2]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
54; CHECK-NEXT:    ret <vscale x 4 x i32> [[RES]]
55;
56  %broadcast.splatinsert = insertelement <vscale x 4 x i64> undef, i64 %index, i32 0
57  %broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
58  %gep = getelementptr i32, i32* %base, <vscale x 4 x i64> %broadcast.splat
59  %res = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %gep, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
60  ret <vscale x 4 x i32> %res
61}
62
63define <vscale x 4 x i32> @test_global_array(<vscale x 4 x i64> %indxs, <vscale x 4 x i1> %mask) #0 {
64; CHECK-LABEL: @test_global_array(
65; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <vscale x 4 x i64> [[INDXS:%.*]]
66; CHECK-NEXT:    [[G:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
67; CHECK-NEXT:    ret <vscale x 4 x i32> [[G]]
68;
69  %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <vscale x 4 x i64> %indxs
70  %g = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %p, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
71  ret <vscale x 4 x i32> %g
72}
73
74define <vscale x 4 x i32> @global_struct_splat(<vscale x 4 x i1> %mask) #0 {
75; CHECK-LABEL: @global_struct_splat(
76; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> shufflevector (<vscale x 4 x i32*> insertelement (<vscale x 4 x i32*> undef, i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32 0), <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer), i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> undef)
77; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
78;
79  %1 = insertelement <vscale x 4 x %struct.a*> undef, %struct.a* @c, i32 0
80  %2 = shufflevector <vscale x 4 x %struct.a*> %1, <vscale x 4 x %struct.a*> undef, <vscale x 4 x i32> zeroinitializer
81  %3 = getelementptr %struct.a, <vscale x 4 x %struct.a*> %2, <vscale x 4 x i64> zeroinitializer, i32 1
82  %4 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %3, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
83  ret <vscale x 4 x i32> %4
84}
85
86define <vscale x 4 x i32> @splat_ptr_gather(i32* %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru) #0 {
87; CHECK-LABEL: @splat_ptr_gather(
88; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
89; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x i32> [[PASSTHRU:%.*]])
90; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
91;
92  %1 = insertelement <vscale x 4 x i32*> undef, i32* %ptr, i32 0
93  %2 = shufflevector <vscale x 4 x i32*> %1, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
94  %3 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %2, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %passthru)
95  ret <vscale x 4 x i32> %3
96}
97
98define void @splat_ptr_scatter(i32* %ptr, <vscale x 4 x i1> %mask, <vscale x 4 x i32> %val) #0 {
99; CHECK-LABEL: @splat_ptr_scatter(
100; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <vscale x 4 x i64> zeroinitializer
101; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> [[VAL:%.*]], <vscale x 4 x i32*> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
102; CHECK-NEXT:    ret void
103;
104  %1 = insertelement <vscale x 4 x i32*> undef, i32* %ptr, i32 0
105  %2 = shufflevector <vscale x 4 x i32*> %1, <vscale x 4 x i32*> undef, <vscale x 4 x i32> zeroinitializer
106  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %2, i32 4, <vscale x 4 x i1> %mask)
107  ret void
108}
109
110declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
111declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
112
113attributes #0 = { "target-features"="+sve" }
114