1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
4 
5 #include <arm_mve.h>
6 
7 // CHECK-LABEL: @test_vshlcq_s8(
8 // CHECK-NEXT:  entry:
9 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
10 // CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 18)
11 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
12 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[B]], align 4
13 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
14 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
15 //
test_vshlcq_s8(int8x16_t a,uint32_t * b)16 int8x16_t test_vshlcq_s8(int8x16_t a, uint32_t *b) {
17 #ifdef POLYMORPHIC
18   return vshlcq(a, b, 18);
19 #else  /* POLYMORPHIC */
20   return vshlcq_s8(a, b, 18);
21 #endif /* POLYMORPHIC */
22 }
23 
24 // CHECK-LABEL: @test_vshlcq_s16(
25 // CHECK-NEXT:  entry:
26 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
27 // CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 16)
28 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
29 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[B]], align 4
30 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
31 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
32 //
test_vshlcq_s16(int16x8_t a,uint32_t * b)33 int16x8_t test_vshlcq_s16(int16x8_t a, uint32_t *b) {
34 #ifdef POLYMORPHIC
35   return vshlcq(a, b, 16);
36 #else  /* POLYMORPHIC */
37   return vshlcq_s16(a, b, 16);
38 #endif /* POLYMORPHIC */
39 }
40 
41 // CHECK-LABEL: @test_vshlcq_s32(
42 // CHECK-NEXT:  entry:
43 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
44 // CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 4)
45 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
46 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[B]], align 4
47 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
48 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
49 //
test_vshlcq_s32(int32x4_t a,uint32_t * b)50 int32x4_t test_vshlcq_s32(int32x4_t a, uint32_t *b) {
51 #ifdef POLYMORPHIC
52   return vshlcq(a, b, 4);
53 #else  /* POLYMORPHIC */
54   return vshlcq_s32(a, b, 4);
55 #endif /* POLYMORPHIC */
56 }
57 
58 // CHECK-LABEL: @test_vshlcq_u8(
59 // CHECK-NEXT:  entry:
60 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
61 // CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 17)
62 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
63 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[B]], align 4
64 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
65 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
66 //
test_vshlcq_u8(uint8x16_t a,uint32_t * b)67 uint8x16_t test_vshlcq_u8(uint8x16_t a, uint32_t *b) {
68 #ifdef POLYMORPHIC
69   return vshlcq(a, b, 17);
70 #else  /* POLYMORPHIC */
71   return vshlcq_u8(a, b, 17);
72 #endif /* POLYMORPHIC */
73 }
74 
75 // CHECK-LABEL: @test_vshlcq_u16(
76 // CHECK-NEXT:  entry:
77 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
78 // CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 17)
79 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
80 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[B]], align 4
81 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
82 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
83 //
test_vshlcq_u16(uint16x8_t a,uint32_t * b)84 uint16x8_t test_vshlcq_u16(uint16x8_t a, uint32_t *b) {
85 #ifdef POLYMORPHIC
86   return vshlcq(a, b, 17);
87 #else  /* POLYMORPHIC */
88   return vshlcq_u16(a, b, 17);
89 #endif /* POLYMORPHIC */
90 }
91 
92 // CHECK-LABEL: @test_vshlcq_u32(
93 // CHECK-NEXT:  entry:
94 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
95 // CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 20)
96 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
97 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[B]], align 4
98 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
99 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
100 //
test_vshlcq_u32(uint32x4_t a,uint32_t * b)101 uint32x4_t test_vshlcq_u32(uint32x4_t a, uint32_t *b) {
102 #ifdef POLYMORPHIC
103   return vshlcq(a, b, 20);
104 #else  /* POLYMORPHIC */
105   return vshlcq_u32(a, b, 20);
106 #endif /* POLYMORPHIC */
107 }
108 
109 // CHECK-LABEL: @test_vshlcq_m_s8(
110 // CHECK-NEXT:  entry:
111 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
112 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
113 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
114 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 29, <16 x i1> [[TMP2]])
115 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 0
116 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[B]], align 4
117 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 1
118 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
119 //
test_vshlcq_m_s8(int8x16_t a,uint32_t * b,mve_pred16_t p)120 int8x16_t test_vshlcq_m_s8(int8x16_t a, uint32_t *b, mve_pred16_t p) {
121 #ifdef POLYMORPHIC
122   return vshlcq_m(a, b, 29, p);
123 #else  /* POLYMORPHIC */
124   return vshlcq_m_s8(a, b, 29, p);
125 #endif /* POLYMORPHIC */
126 }
127 
128 // CHECK-LABEL: @test_vshlcq_m_s16(
129 // CHECK-NEXT:  entry:
130 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
131 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
132 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
133 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 17, <8 x i1> [[TMP2]])
134 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 0
135 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[B]], align 4
136 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 1
137 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
138 //
test_vshlcq_m_s16(int16x8_t a,uint32_t * b,mve_pred16_t p)139 int16x8_t test_vshlcq_m_s16(int16x8_t a, uint32_t *b, mve_pred16_t p) {
140 #ifdef POLYMORPHIC
141   return vshlcq_m(a, b, 17, p);
142 #else  /* POLYMORPHIC */
143   return vshlcq_m_s16(a, b, 17, p);
144 #endif /* POLYMORPHIC */
145 }
146 
147 // CHECK-LABEL: @test_vshlcq_m_s32(
148 // CHECK-NEXT:  entry:
149 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
150 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
151 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
152 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 9, <4 x i1> [[TMP2]])
153 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 0
154 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[B]], align 4
155 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 1
156 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
157 //
test_vshlcq_m_s32(int32x4_t a,uint32_t * b,mve_pred16_t p)158 int32x4_t test_vshlcq_m_s32(int32x4_t a, uint32_t *b, mve_pred16_t p) {
159 #ifdef POLYMORPHIC
160   return vshlcq_m(a, b, 9, p);
161 #else  /* POLYMORPHIC */
162   return vshlcq_m_s32(a, b, 9, p);
163 #endif /* POLYMORPHIC */
164 }
165 
166 // CHECK-LABEL: @test_vshlcq_m_u8(
167 // CHECK-NEXT:  entry:
168 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
169 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
170 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
171 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 21, <16 x i1> [[TMP2]])
172 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 0
173 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[B]], align 4
174 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 1
175 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
176 //
test_vshlcq_m_u8(uint8x16_t a,uint32_t * b,mve_pred16_t p)177 uint8x16_t test_vshlcq_m_u8(uint8x16_t a, uint32_t *b, mve_pred16_t p) {
178 #ifdef POLYMORPHIC
179   return vshlcq_m(a, b, 21, p);
180 #else  /* POLYMORPHIC */
181   return vshlcq_m_u8(a, b, 21, p);
182 #endif /* POLYMORPHIC */
183 }
184 
185 // CHECK-LABEL: @test_vshlcq_m_u16(
186 // CHECK-NEXT:  entry:
187 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
188 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
189 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
190 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 24, <8 x i1> [[TMP2]])
191 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 0
192 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[B]], align 4
193 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 1
194 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
195 //
test_vshlcq_m_u16(uint16x8_t a,uint32_t * b,mve_pred16_t p)196 uint16x8_t test_vshlcq_m_u16(uint16x8_t a, uint32_t *b, mve_pred16_t p) {
197 #ifdef POLYMORPHIC
198   return vshlcq_m(a, b, 24, p);
199 #else  /* POLYMORPHIC */
200   return vshlcq_m_u16(a, b, 24, p);
201 #endif /* POLYMORPHIC */
202 }
203 
204 // CHECK-LABEL: @test_vshlcq_m_u32(
205 // CHECK-NEXT:  entry:
206 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
207 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
208 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
209 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 26, <4 x i1> [[TMP2]])
210 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 0
211 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[B]], align 4
212 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 1
213 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
214 //
test_vshlcq_m_u32(uint32x4_t a,uint32_t * b,mve_pred16_t p)215 uint32x4_t test_vshlcq_m_u32(uint32x4_t a, uint32_t *b, mve_pred16_t p) {
216 #ifdef POLYMORPHIC
217   return vshlcq_m(a, b, 26, p);
218 #else  /* POLYMORPHIC */
219   return vshlcq_m_u32(a, b, 26, p);
220 #endif /* POLYMORPHIC */
221 }
222