1 // REQUIRES: aarch64-registered-target
2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
4 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s
6 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null
7 #include <arm_sve.h>
8 
9 #ifdef SVE_OVERLOADED_FORMS
10 // A simple used,unused... macro, long enough to represent any SVE builtin.
11 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
12 #else
13 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
14 #endif
15 
test_svst4_s8(svbool_t pg,int8_t * base,svint8x4_t data)16 void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data)
17 {
18   // CHECK-LABEL: test_svst4_s8
19   // CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
20   // CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
21   // CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
22   // CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
23   // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %base)
24   // CHECK-NEXT: ret
25   return SVE_ACLE_FUNC(svst4,_s8,,)(pg, base, data);
26 }
27 
test_svst4_s16(svbool_t pg,int16_t * base,svint16x4_t data)28 void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data)
29 {
30   // CHECK-LABEL: test_svst4_s16
31   // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
32   // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
33   // CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
34   // CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
35   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
36   // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %base)
37   // CHECK-NEXT: ret
38   return SVE_ACLE_FUNC(svst4,_s16,,)(pg, base, data);
39 }
40 
test_svst4_s32(svbool_t pg,int32_t * base,svint32x4_t data)41 void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data)
42 {
43   // CHECK-LABEL: test_svst4_s32
44   // CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
45   // CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
46   // CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
47   // CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
48   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
49   // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %base)
50   // CHECK-NEXT: ret
51   return SVE_ACLE_FUNC(svst4,_s32,,)(pg, base, data);
52 }
53 
test_svst4_s64(svbool_t pg,int64_t * base,svint64x4_t data)54 void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data)
55 {
56   // CHECK-LABEL: test_svst4_s64
57   // CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
58   // CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
59   // CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
60   // CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
61   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
62   // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %base)
63   // CHECK-NEXT: ret
64   return SVE_ACLE_FUNC(svst4,_s64,,)(pg, base, data);
65 }
66 
test_svst4_u8(svbool_t pg,uint8_t * base,svuint8x4_t data)67 void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data)
68 {
69   // CHECK-LABEL: test_svst4_u8
70   // CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
71   // CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
72   // CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
73   // CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
74   // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %base)
75   // CHECK-NEXT: ret
76   return SVE_ACLE_FUNC(svst4,_u8,,)(pg, base, data);
77 }
78 
test_svst4_u16(svbool_t pg,uint16_t * base,svuint16x4_t data)79 void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data)
80 {
81   // CHECK-LABEL: test_svst4_u16
82   // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
83   // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
84   // CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
85   // CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
86   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
87   // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %base)
88   // CHECK-NEXT: ret
89   return SVE_ACLE_FUNC(svst4,_u16,,)(pg, base, data);
90 }
91 
test_svst4_u32(svbool_t pg,uint32_t * base,svuint32x4_t data)92 void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data)
93 {
94   // CHECK-LABEL: test_svst4_u32
95   // CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
96   // CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
97   // CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
98   // CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
99   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
100   // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %base)
101   // CHECK-NEXT: ret
102   return SVE_ACLE_FUNC(svst4,_u32,,)(pg, base, data);
103 }
104 
test_svst4_u64(svbool_t pg,uint64_t * base,svuint64x4_t data)105 void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data)
106 {
107   // CHECK-LABEL: test_svst4_u64
108   // CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
109   // CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
110   // CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
111   // CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
112   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
113   // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %base)
114   // CHECK-NEXT: ret
115   return SVE_ACLE_FUNC(svst4,_u64,,)(pg, base, data);
116 }
117 
test_svst4_f16(svbool_t pg,float16_t * base,svfloat16x4_t data)118 void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data)
119 {
120   // CHECK-LABEL: test_svst4_f16
121   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
122   // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 0)
123   // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 1)
124   // CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 2)
125   // CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 3)
126   // CHECK: call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x half> %[[V2]], <vscale x 8 x half> %[[V3]], <vscale x 8 x i1> %[[PG]], half* %base)
127   // CHECK-NEXT: ret
128   return SVE_ACLE_FUNC(svst4,_f16,,)(pg, base, data);
129 }
130 
test_svst4_f32(svbool_t pg,float32_t * base,svfloat32x4_t data)131 void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data)
132 {
133   // CHECK-LABEL: test_svst4_f32
134   // CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 0)
135   // CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 1)
136   // CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 2)
137   // CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 3)
138   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
139   // CHECK: call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x float> %[[V2]], <vscale x 4 x float> %[[V3]], <vscale x 4 x i1> %[[PG]], float* %base)
140   // CHECK-NEXT: ret
141   return SVE_ACLE_FUNC(svst4,_f32,,)(pg, base, data);
142 }
143 
test_svst4_f64(svbool_t pg,float64_t * base,svfloat64x4_t data)144 void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data)
145 {
146   // CHECK-LABEL: test_svst4_f64
147   // CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 0)
148   // CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 1)
149   // CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 2)
150   // CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 3)
151   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
152   // CHECK: call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x double> %[[V2]], <vscale x 2 x double> %[[V3]], <vscale x 2 x i1> %[[PG]], double* %base)
153   // CHECK-NEXT: ret
154   return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data);
155 }
156 
test_svst4_vnum_s8(svbool_t pg,int8_t * base,int64_t vnum,svint8x4_t data)157 void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data)
158 {
159   // CHECK-LABEL: test_svst4_vnum_s8
160   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
161   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
162   // CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
163   // CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
164   // CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
165   // CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
166   // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
167   // CHECK-NEXT: ret
168   return SVE_ACLE_FUNC(svst4_vnum,_s8,,)(pg, base, vnum, data);
169 }
170 
test_svst4_vnum_s16(svbool_t pg,int16_t * base,int64_t vnum,svint16x4_t data)171 void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data)
172 {
173   // CHECK-LABEL: test_svst4_vnum_s16
174   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
175   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
176   // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
177   // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
178   // CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
179   // CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
180   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
181   // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
182   // CHECK-NEXT: ret
183   return SVE_ACLE_FUNC(svst4_vnum,_s16,,)(pg, base, vnum, data);
184 }
185 
test_svst4_vnum_s32(svbool_t pg,int32_t * base,int64_t vnum,svint32x4_t data)186 void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data)
187 {
188   // CHECK-LABEL: test_svst4_vnum_s32
189   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
190   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
191   // CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
192   // CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
193   // CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
194   // CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
195   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
196   // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
197   // CHECK-NEXT: ret
198   return SVE_ACLE_FUNC(svst4_vnum,_s32,,)(pg, base, vnum, data);
199 }
200 
test_svst4_vnum_s64(svbool_t pg,int64_t * base,int64_t vnum,svint64x4_t data)201 void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data)
202 {
203   // CHECK-LABEL: test_svst4_vnum_s64
204   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
205   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
206   // CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
207   // CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
208   // CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
209   // CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
210   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
211   // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
212   // CHECK-NEXT: ret
213   return SVE_ACLE_FUNC(svst4_vnum,_s64,,)(pg, base, vnum, data);
214 }
215 
test_svst4_vnum_u8(svbool_t pg,uint8_t * base,int64_t vnum,svuint8x4_t data)216 void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data)
217 {
218   // CHECK-LABEL: test_svst4_vnum_u8
219   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
220   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
221   // CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
222   // CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
223   // CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
224   // CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
225   // CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
226   // CHECK-NEXT: ret
227   return SVE_ACLE_FUNC(svst4_vnum,_u8,,)(pg, base, vnum, data);
228 }
229 
test_svst4_vnum_u16(svbool_t pg,uint16_t * base,int64_t vnum,svuint16x4_t data)230 void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data)
231 {
232   // CHECK-LABEL: test_svst4_vnum_u16
233   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
234   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
235   // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
236   // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
237   // CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
238   // CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
239   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
240   // CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
241   // CHECK-NEXT: ret
242   return SVE_ACLE_FUNC(svst4_vnum,_u16,,)(pg, base, vnum, data);
243 }
244 
test_svst4_vnum_u32(svbool_t pg,uint32_t * base,int64_t vnum,svuint32x4_t data)245 void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data)
246 {
247   // CHECK-LABEL: test_svst4_vnum_u32
248   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
249   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
250   // CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
251   // CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
252   // CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
253   // CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
254   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
255   // CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
256   // CHECK-NEXT: ret
257   return SVE_ACLE_FUNC(svst4_vnum,_u32,,)(pg, base, vnum, data);
258 }
259 
test_svst4_vnum_u64(svbool_t pg,uint64_t * base,int64_t vnum,svuint64x4_t data)260 void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data)
261 {
262   // CHECK-LABEL: test_svst4_vnum_u64
263   // CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
264   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
265   // CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
266   // CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
267   // CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
268   // CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
269   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
270   // CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
271   // CHECK-NEXT: ret
272   return SVE_ACLE_FUNC(svst4_vnum,_u64,,)(pg, base, vnum, data);
273 }
274 
test_svst4_vnum_f16(svbool_t pg,float16_t * base,int64_t vnum,svfloat16x4_t data)275 void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data)
276 {
277   // CHECK-LABEL: test_svst4_vnum_f16
278   // CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
279   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
280   // CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 0)
281   // CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 1)
282   // CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 2)
283   // CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 3)
284   // CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
285   // CHECK: call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x half> %[[V2]], <vscale x 8 x half> %[[V3]], <vscale x 8 x i1> %[[PG]], half* %[[GEP]])
286   // CHECK-NEXT: ret
287   return SVE_ACLE_FUNC(svst4_vnum,_f16,,)(pg, base, vnum, data);
288 }
289 
test_svst4_vnum_f32(svbool_t pg,float32_t * base,int64_t vnum,svfloat32x4_t data)290 void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data)
291 {
292   // CHECK-LABEL: test_svst4_vnum_f32
293   // CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
294   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
295   // CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 0)
296   // CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 1)
297   // CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 2)
298   // CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 3)
299   // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
300   // CHECK: call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x float> %[[V2]], <vscale x 4 x float> %[[V3]], <vscale x 4 x i1> %[[PG]], float* %[[GEP]])
301   // CHECK-NEXT: ret
302   return SVE_ACLE_FUNC(svst4_vnum,_f32,,)(pg, base, vnum, data);
303 }
304 
test_svst4_vnum_f64(svbool_t pg,float64_t * base,int64_t vnum,svfloat64x4_t data)305 void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data)
306 {
307   // CHECK-LABEL: test_svst4_vnum_f64
308   // CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
309   // CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
310   // CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 0)
311   // CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 1)
312   // CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 2)
313   // CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 3)
314   // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
315   // CHECK: call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x double> %[[V2]], <vscale x 2 x double> %[[V3]], <vscale x 2 x i1> %[[PG]], double* %[[GEP]])
316   // CHECK-NEXT: ret
317   return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data);
318 }
319