1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s
3
4 // CHECK-LABEL: @test1(
5 // CHECK-NEXT: entry:
6 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
7 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
8 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
9 // CHECK-NEXT: ret void
10 //
test1(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)11 void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
12 __vector_quad vq = *((__vector_quad *)vqp);
13 __vector_pair vp = *((__vector_pair *)vpp);
14 __vector_quad res;
15 __builtin_mma_assemble_acc(&res, vc, vc, vc, vc);
16 *((__vector_quad *)resp) = res;
17 }
18
19 // CHECK-LABEL: @test2(
20 // CHECK-NEXT: entry:
21 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
22 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64
23 // CHECK-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]])
24 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
25 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0
26 // CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
27 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1
28 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
29 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
30 // CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
31 // CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2
32 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32
33 // CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
34 // CHECK-NEXT: store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16
35 // CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3
36 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48
37 // CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>*
38 // CHECK-NEXT: store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16
39 // CHECK-NEXT: ret void
40 //
test2(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)41 void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
42 __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
43 }
44
45 // CHECK-LABEL: @test3(
46 // CHECK-NEXT: entry:
47 // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
48 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
49 // CHECK-NEXT: store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
50 // CHECK-NEXT: ret void
51 //
test3(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)52 void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
53 __vector_quad vq = *((__vector_quad *)vqp);
54 __vector_pair vp = *((__vector_pair *)vpp);
55 __vector_pair res;
56 __builtin_mma_assemble_pair(&res, vc, vc);
57 *((__vector_pair *)resp) = res;
58 }
59
60 // CHECK-LABEL: @test4(
61 // CHECK-NEXT: entry:
62 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
63 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
64 // CHECK-NEXT: [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]])
65 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
66 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
67 // CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
68 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
69 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
70 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
71 // CHECK-NEXT: store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
72 // CHECK-NEXT: ret void
73 //
test4(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)74 void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
75 __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
76 }
77
78 // CHECK-LABEL: @test5(
79 // CHECK-NEXT: entry:
80 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
81 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
82 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP1]])
83 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
84 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
85 // CHECK-NEXT: ret void
86 //
test5(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)87 void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
88 __vector_quad vq = *((__vector_quad *)vqp);
89 __vector_pair vp = *((__vector_pair *)vpp);
90 __builtin_mma_xxmtacc(&vq);
91 *((__vector_quad *)resp) = vq;
92 }
93
94 // CHECK-LABEL: @test6(
95 // CHECK-NEXT: entry:
96 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
97 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
98 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP1]])
99 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
100 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
101 // CHECK-NEXT: ret void
102 //
test6(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)103 void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
104 __vector_quad vq = *((__vector_quad *)vqp);
105 __vector_pair vp = *((__vector_pair *)vpp);
106 __builtin_mma_xxmfacc(&vq);
107 *((__vector_quad *)resp) = vq;
108 }
109
110 // CHECK-LABEL: @test7(
111 // CHECK-NEXT: entry:
112 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
113 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
114 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
115 // CHECK-NEXT: ret void
116 //
test7(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)117 void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
118 __vector_quad vq = *((__vector_quad *)vqp);
119 __vector_pair vp = *((__vector_pair *)vpp);
120 __builtin_mma_xxsetaccz(&vq);
121 *((__vector_quad *)resp) = vq;
122 }
123
124 // CHECK-LABEL: @test8(
125 // CHECK-NEXT: entry:
126 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
127 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
128 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
129 // CHECK-NEXT: ret void
130 //
test8(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)131 void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
132 __vector_quad vq = *((__vector_quad *)vqp);
133 __vector_pair vp = *((__vector_pair *)vpp);
134 __builtin_mma_xvi4ger8(&vq, vc, vc);
135 *((__vector_quad *)resp) = vq;
136 }
137
138 // CHECK-LABEL: @test9(
139 // CHECK-NEXT: entry:
140 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
141 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
142 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
143 // CHECK-NEXT: ret void
144 //
test9(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)145 void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
146 __vector_quad vq = *((__vector_quad *)vqp);
147 __vector_pair vp = *((__vector_pair *)vpp);
148 __builtin_mma_xvi8ger4(&vq, vc, vc);
149 *((__vector_quad *)resp) = vq;
150 }
151
152 // CHECK-LABEL: @test10(
153 // CHECK-NEXT: entry:
154 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
155 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
156 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
157 // CHECK-NEXT: ret void
158 //
test10(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)159 void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
160 __vector_quad vq = *((__vector_quad *)vqp);
161 __vector_pair vp = *((__vector_pair *)vpp);
162 __builtin_mma_xvi16ger2(&vq, vc, vc);
163 *((__vector_quad *)resp) = vq;
164 }
165
166 // CHECK-LABEL: @test11(
167 // CHECK-NEXT: entry:
168 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
169 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
170 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
171 // CHECK-NEXT: ret void
172 //
test11(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)173 void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
174 __vector_quad vq = *((__vector_quad *)vqp);
175 __vector_pair vp = *((__vector_pair *)vpp);
176 __builtin_mma_xvi16ger2s(&vq, vc, vc);
177 *((__vector_quad *)resp) = vq;
178 }
179
180 // CHECK-LABEL: @test12(
181 // CHECK-NEXT: entry:
182 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
183 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
184 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
185 // CHECK-NEXT: ret void
186 //
test12(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)187 void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
188 __vector_quad vq = *((__vector_quad *)vqp);
189 __vector_pair vp = *((__vector_pair *)vpp);
190 __builtin_mma_xvf16ger2(&vq, vc, vc);
191 *((__vector_quad *)resp) = vq;
192 }
193
194 // CHECK-LABEL: @test13(
195 // CHECK-NEXT: entry:
196 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
197 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
198 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
199 // CHECK-NEXT: ret void
200 //
test13(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)201 void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
202 __vector_quad vq = *((__vector_quad *)vqp);
203 __vector_pair vp = *((__vector_pair *)vpp);
204 __builtin_mma_xvf32ger(&vq, vc, vc);
205 *((__vector_quad *)resp) = vq;
206 }
207
208 // CHECK-LABEL: @test14(
209 // CHECK-NEXT: entry:
210 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
211 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6
212 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
213 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
214 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
215 // CHECK-NEXT: ret void
216 //
test14(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)217 void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
218 __vector_quad vq = *((__vector_quad *)vqp);
219 __vector_pair vp = *((__vector_pair *)vpp);
220 __builtin_mma_xvf64ger(&vq, vp, vc);
221 *((__vector_quad *)resp) = vq;
222 }
223
224 // CHECK-LABEL: @test15(
225 // CHECK-NEXT: entry:
226 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
227 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
228 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
229 // CHECK-NEXT: ret void
230 //
test15(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)231 void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
232 __vector_quad vq = *((__vector_quad *)vqp);
233 __vector_pair vp = *((__vector_pair *)vpp);
234 __builtin_mma_pmxvi4ger8(&vq, vc, vc, 0, 0, 0);
235 *((__vector_quad *)resp) = vq;
236 }
237
238 // CHECK-LABEL: @test16(
239 // CHECK-NEXT: entry:
240 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
241 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
242 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
243 // CHECK-NEXT: ret void
244 //
test16(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)245 void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
246 __vector_quad vq = *((__vector_quad *)vqp);
247 __vector_pair vp = *((__vector_pair *)vpp);
248 __builtin_mma_pmxvi8ger4(&vq, vc, vc, 0, 0, 0);
249 *((__vector_quad *)resp) = vq;
250 }
251
252 // CHECK-LABEL: @test17(
253 // CHECK-NEXT: entry:
254 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
255 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
256 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
257 // CHECK-NEXT: ret void
258 //
test17(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)259 void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
260 __vector_quad vq = *((__vector_quad *)vqp);
261 __vector_pair vp = *((__vector_pair *)vpp);
262 __builtin_mma_pmxvi16ger2(&vq, vc, vc, 0, 0, 0);
263 *((__vector_quad *)resp) = vq;
264 }
265
266 // CHECK-LABEL: @test18(
267 // CHECK-NEXT: entry:
268 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
269 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
270 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
271 // CHECK-NEXT: ret void
272 //
test18(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)273 void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
274 __vector_quad vq = *((__vector_quad *)vqp);
275 __vector_pair vp = *((__vector_pair *)vpp);
276 __builtin_mma_pmxvi16ger2s(&vq, vc, vc, 0, 0, 0);
277 *((__vector_quad *)resp) = vq;
278 }
279
280 // CHECK-LABEL: @test19(
281 // CHECK-NEXT: entry:
282 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
283 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
284 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
285 // CHECK-NEXT: ret void
286 //
test19(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)287 void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
288 __vector_quad vq = *((__vector_quad *)vqp);
289 __vector_pair vp = *((__vector_pair *)vpp);
290 __builtin_mma_pmxvf16ger2(&vq, vc, vc, 0, 0, 0);
291 *((__vector_quad *)resp) = vq;
292 }
293
294 // CHECK-LABEL: @test20(
295 // CHECK-NEXT: entry:
296 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
297 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
298 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
299 // CHECK-NEXT: ret void
300 //
test20(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)301 void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
302 __vector_quad vq = *((__vector_quad *)vqp);
303 __vector_pair vp = *((__vector_pair *)vpp);
304 __builtin_mma_pmxvf32ger(&vq, vc, vc, 0, 0);
305 *((__vector_quad *)resp) = vq;
306 }
307
308 // CHECK-LABEL: @test21(
309 // CHECK-NEXT: entry:
310 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
311 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6
312 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
313 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
314 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
315 // CHECK-NEXT: ret void
316 //
test21(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)317 void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
318 __vector_quad vq = *((__vector_quad *)vqp);
319 __vector_pair vp = *((__vector_pair *)vpp);
320 __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0);
321 *((__vector_quad *)resp) = vq;
322 }
323
324 // CHECK-LABEL: @test22(
325 // CHECK-NEXT: entry:
326 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
327 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
328 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
329 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
330 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
331 // CHECK-NEXT: ret void
332 //
test22(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)333 void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
334 __vector_quad vq = *((__vector_quad *)vqp);
335 __vector_pair vp = *((__vector_pair *)vpp);
336 __builtin_mma_xvi4ger8pp(&vq, vc, vc);
337 *((__vector_quad *)resp) = vq;
338 }
339
340 // CHECK-LABEL: @test23(
341 // CHECK-NEXT: entry:
342 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
343 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
344 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
345 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
346 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
347 // CHECK-NEXT: ret void
348 //
test23(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)349 void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
350 __vector_quad vq = *((__vector_quad *)vqp);
351 __vector_pair vp = *((__vector_pair *)vpp);
352 __builtin_mma_xvi8ger4pp(&vq, vc, vc);
353 *((__vector_quad *)resp) = vq;
354 }
355
356 // CHECK-LABEL: @test24(
357 // CHECK-NEXT: entry:
358 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
359 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
360 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
361 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
362 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
363 // CHECK-NEXT: ret void
364 //
test24(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)365 void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
366 __vector_quad vq = *((__vector_quad *)vqp);
367 __vector_pair vp = *((__vector_pair *)vpp);
368 __builtin_mma_xvi8ger4spp(&vq, vc, vc);
369 *((__vector_quad *)resp) = vq;
370 }
371
372 // CHECK-LABEL: @test25(
373 // CHECK-NEXT: entry:
374 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
375 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
376 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
377 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
378 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
379 // CHECK-NEXT: ret void
380 //
test25(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)381 void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
382 __vector_quad vq = *((__vector_quad *)vqp);
383 __vector_pair vp = *((__vector_pair *)vpp);
384 __builtin_mma_xvi16ger2pp(&vq, vc, vc);
385 *((__vector_quad *)resp) = vq;
386 }
387
388 // CHECK-LABEL: @test26(
389 // CHECK-NEXT: entry:
390 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
391 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
392 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
393 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
394 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
395 // CHECK-NEXT: ret void
396 //
test26(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)397 void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
398 __vector_quad vq = *((__vector_quad *)vqp);
399 __vector_pair vp = *((__vector_pair *)vpp);
400 __builtin_mma_xvi16ger2spp(&vq, vc, vc);
401 *((__vector_quad *)resp) = vq;
402 }
403
404 // CHECK-LABEL: @test27(
405 // CHECK-NEXT: entry:
406 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
407 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
408 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
409 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
410 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
411 // CHECK-NEXT: ret void
412 //
test27(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)413 void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
414 __vector_quad vq = *((__vector_quad *)vqp);
415 __vector_pair vp = *((__vector_pair *)vpp);
416 __builtin_mma_pmxvi4ger8pp(&vq, vc, vc, 0, 0, 0);
417 *((__vector_quad *)resp) = vq;
418 }
419
420 // CHECK-LABEL: @test28(
421 // CHECK-NEXT: entry:
422 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
423 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
424 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
425 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
426 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
427 // CHECK-NEXT: ret void
428 //
test28(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)429 void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
430 __vector_quad vq = *((__vector_quad *)vqp);
431 __vector_pair vp = *((__vector_pair *)vpp);
432 __builtin_mma_pmxvi8ger4pp(&vq, vc, vc, 0, 0, 0);
433 *((__vector_quad *)resp) = vq;
434 }
435
436 // CHECK-LABEL: @test29(
437 // CHECK-NEXT: entry:
438 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
439 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
440 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
441 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
442 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
443 // CHECK-NEXT: ret void
444 //
test29(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)445 void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
446 __vector_quad vq = *((__vector_quad *)vqp);
447 __vector_pair vp = *((__vector_pair *)vpp);
448 __builtin_mma_pmxvi8ger4spp(&vq, vc, vc, 0, 0, 0);
449 *((__vector_quad *)resp) = vq;
450 }
451
452 // CHECK-LABEL: @test30(
453 // CHECK-NEXT: entry:
454 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
455 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
456 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
457 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
458 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
459 // CHECK-NEXT: ret void
460 //
test30(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)461 void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
462 __vector_quad vq = *((__vector_quad *)vqp);
463 __vector_pair vp = *((__vector_pair *)vpp);
464 __builtin_mma_pmxvi16ger2pp(&vq, vc, vc, 0, 0, 0);
465 *((__vector_quad *)resp) = vq;
466 }
467
468 // CHECK-LABEL: @test31(
469 // CHECK-NEXT: entry:
470 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
471 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
472 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
473 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
474 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
475 // CHECK-NEXT: ret void
476 //
test31(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)477 void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
478 __vector_quad vq = *((__vector_quad *)vqp);
479 __vector_pair vp = *((__vector_pair *)vpp);
480 __builtin_mma_pmxvi16ger2spp(&vq, vc, vc, 0, 0, 0);
481 *((__vector_quad *)resp) = vq;
482 }
483
484 // CHECK-LABEL: @test32(
485 // CHECK-NEXT: entry:
486 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
487 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
488 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
489 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
490 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
491 // CHECK-NEXT: ret void
492 //
test32(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)493 void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
494 __vector_quad vq = *((__vector_quad *)vqp);
495 __vector_pair vp = *((__vector_pair *)vpp);
496 __builtin_mma_xvf16ger2pp(&vq, vc, vc);
497 *((__vector_quad *)resp) = vq;
498 }
499
500 // CHECK-LABEL: @test33(
501 // CHECK-NEXT: entry:
502 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
503 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
504 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
505 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
506 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
507 // CHECK-NEXT: ret void
508 //
test33(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)509 void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
510 __vector_quad vq = *((__vector_quad *)vqp);
511 __vector_pair vp = *((__vector_pair *)vpp);
512 __builtin_mma_xvf16ger2pn(&vq, vc, vc);
513 *((__vector_quad *)resp) = vq;
514 }
515
516 // CHECK-LABEL: @test34(
517 // CHECK-NEXT: entry:
518 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
519 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
520 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
521 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
522 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
523 // CHECK-NEXT: ret void
524 //
test34(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)525 void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
526 __vector_quad vq = *((__vector_quad *)vqp);
527 __vector_pair vp = *((__vector_pair *)vpp);
528 __builtin_mma_xvf16ger2np(&vq, vc, vc);
529 *((__vector_quad *)resp) = vq;
530 }
531
532 // CHECK-LABEL: @test35(
533 // CHECK-NEXT: entry:
534 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
535 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
536 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
537 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
538 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
539 // CHECK-NEXT: ret void
540 //
test35(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)541 void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
542 __vector_quad vq = *((__vector_quad *)vqp);
543 __vector_pair vp = *((__vector_pair *)vpp);
544 __builtin_mma_xvf16ger2nn(&vq, vc, vc);
545 *((__vector_quad *)resp) = vq;
546 }
547
548 // CHECK-LABEL: @test36(
549 // CHECK-NEXT: entry:
550 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
551 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
552 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
553 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
554 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
555 // CHECK-NEXT: ret void
556 //
test36(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)557 void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
558 __vector_quad vq = *((__vector_quad *)vqp);
559 __vector_pair vp = *((__vector_pair *)vpp);
560 __builtin_mma_pmxvf16ger2pp(&vq, vc, vc, 0, 0, 0);
561 *((__vector_quad *)resp) = vq;
562 }
563
564 // CHECK-LABEL: @test37(
565 // CHECK-NEXT: entry:
566 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
567 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
568 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
569 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
570 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
571 // CHECK-NEXT: ret void
572 //
test37(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)573 void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
574 __vector_quad vq = *((__vector_quad *)vqp);
575 __vector_pair vp = *((__vector_pair *)vpp);
576 __builtin_mma_pmxvf16ger2pn(&vq, vc, vc, 0, 0, 0);
577 *((__vector_quad *)resp) = vq;
578 }
579
580 // CHECK-LABEL: @test38(
581 // CHECK-NEXT: entry:
582 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
583 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
584 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
585 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
586 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
587 // CHECK-NEXT: ret void
588 //
test38(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)589 void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
590 __vector_quad vq = *((__vector_quad *)vqp);
591 __vector_pair vp = *((__vector_pair *)vpp);
592 __builtin_mma_pmxvf16ger2np(&vq, vc, vc, 0, 0, 0);
593 *((__vector_quad *)resp) = vq;
594 }
595
596 // CHECK-LABEL: @test39(
597 // CHECK-NEXT: entry:
598 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
599 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
600 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
601 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
602 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
603 // CHECK-NEXT: ret void
604 //
test39(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)605 void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
606 __vector_quad vq = *((__vector_quad *)vqp);
607 __vector_pair vp = *((__vector_pair *)vpp);
608 __builtin_mma_pmxvf16ger2nn(&vq, vc, vc, 0, 0, 0);
609 *((__vector_quad *)resp) = vq;
610 }
611
612 // CHECK-LABEL: @test40(
613 // CHECK-NEXT: entry:
614 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
615 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
616 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
617 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
618 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
619 // CHECK-NEXT: ret void
620 //
test40(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)621 void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
622 __vector_quad vq = *((__vector_quad *)vqp);
623 __vector_pair vp = *((__vector_pair *)vpp);
624 __builtin_mma_xvf32gerpp(&vq, vc, vc);
625 *((__vector_quad *)resp) = vq;
626 }
627
628 // CHECK-LABEL: @test41(
629 // CHECK-NEXT: entry:
630 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
631 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
632 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
633 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
634 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
635 // CHECK-NEXT: ret void
636 //
test41(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)637 void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
638 __vector_quad vq = *((__vector_quad *)vqp);
639 __vector_pair vp = *((__vector_pair *)vpp);
640 __builtin_mma_xvf32gerpn(&vq, vc, vc);
641 *((__vector_quad *)resp) = vq;
642 }
643
644 // CHECK-LABEL: @test42(
645 // CHECK-NEXT: entry:
646 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
647 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
648 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
649 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
650 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
651 // CHECK-NEXT: ret void
652 //
test42(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)653 void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
654 __vector_quad vq = *((__vector_quad *)vqp);
655 __vector_pair vp = *((__vector_pair *)vpp);
656 __builtin_mma_xvf32gernp(&vq, vc, vc);
657 *((__vector_quad *)resp) = vq;
658 }
659
660 // CHECK-LABEL: @test43(
661 // CHECK-NEXT: entry:
662 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
663 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
664 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
665 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
666 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
667 // CHECK-NEXT: ret void
668 //
test43(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)669 void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
670 __vector_quad vq = *((__vector_quad *)vqp);
671 __vector_pair vp = *((__vector_pair *)vpp);
672 __builtin_mma_xvf32gernn(&vq, vc, vc);
673 *((__vector_quad *)resp) = vq;
674 }
675
676 // CHECK-LABEL: @test44(
677 // CHECK-NEXT: entry:
678 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
679 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
680 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
681 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
682 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
683 // CHECK-NEXT: ret void
684 //
test44(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)685 void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
686 __vector_quad vq = *((__vector_quad *)vqp);
687 __vector_pair vp = *((__vector_pair *)vpp);
688 __builtin_mma_pmxvf32gerpp(&vq, vc, vc, 0, 0);
689 *((__vector_quad *)resp) = vq;
690 }
691
692 // CHECK-LABEL: @test45(
693 // CHECK-NEXT: entry:
694 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
695 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
696 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
697 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
698 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
699 // CHECK-NEXT: ret void
700 //
test45(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)701 void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
702 __vector_quad vq = *((__vector_quad *)vqp);
703 __vector_pair vp = *((__vector_pair *)vpp);
704 __builtin_mma_pmxvf32gerpn(&vq, vc, vc, 0, 0);
705 *((__vector_quad *)resp) = vq;
706 }
707
708 // CHECK-LABEL: @test46(
709 // CHECK-NEXT: entry:
710 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
711 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
712 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
713 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
714 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
715 // CHECK-NEXT: ret void
716 //
test46(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)717 void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
718 __vector_quad vq = *((__vector_quad *)vqp);
719 __vector_pair vp = *((__vector_pair *)vpp);
720 __builtin_mma_pmxvf32gernp(&vq, vc, vc, 0, 0);
721 *((__vector_quad *)resp) = vq;
722 }
723
724 // CHECK-LABEL: @test47(
725 // CHECK-NEXT: entry:
726 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
727 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
728 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
729 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
730 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
731 // CHECK-NEXT: ret void
732 //
test47(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)733 void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
734 __vector_quad vq = *((__vector_quad *)vqp);
735 __vector_pair vp = *((__vector_pair *)vpp);
736 __builtin_mma_pmxvf32gernn(&vq, vc, vc, 0, 0);
737 *((__vector_quad *)resp) = vq;
738 }
739
740 // CHECK-LABEL: @test48(
741 // CHECK-NEXT: entry:
742 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
743 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
744 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
745 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
746 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
747 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
748 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
749 // CHECK-NEXT: ret void
750 //
test48(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)751 void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
752 __vector_quad vq = *((__vector_quad *)vqp);
753 __vector_pair vp = *((__vector_pair *)vpp);
754 __builtin_mma_xvf64gerpp(&vq, vp, vc);
755 *((__vector_quad *)resp) = vq;
756 }
757
758 // CHECK-LABEL: @test49(
759 // CHECK-NEXT: entry:
760 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
761 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
762 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
763 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
764 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
765 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
766 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
767 // CHECK-NEXT: ret void
768 //
test49(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)769 void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
770 __vector_quad vq = *((__vector_quad *)vqp);
771 __vector_pair vp = *((__vector_pair *)vpp);
772 __builtin_mma_xvf64gerpn(&vq, vp, vc);
773 *((__vector_quad *)resp) = vq;
774 }
775
776 // CHECK-LABEL: @test50(
777 // CHECK-NEXT: entry:
778 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
779 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
780 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
781 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
782 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
783 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
784 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
785 // CHECK-NEXT: ret void
786 //
test50(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)787 void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
788 __vector_quad vq = *((__vector_quad *)vqp);
789 __vector_pair vp = *((__vector_pair *)vpp);
790 __builtin_mma_xvf64gernp(&vq, vp, vc);
791 *((__vector_quad *)resp) = vq;
792 }
793
794 // CHECK-LABEL: @test51(
795 // CHECK-NEXT: entry:
796 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
797 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
798 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
799 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
800 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
801 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
802 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
803 // CHECK-NEXT: ret void
804 //
test51(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)805 void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
806 __vector_quad vq = *((__vector_quad *)vqp);
807 __vector_pair vp = *((__vector_pair *)vpp);
808 __builtin_mma_xvf64gernn(&vq, vp, vc);
809 *((__vector_quad *)resp) = vq;
810 }
811
812 // CHECK-LABEL: @test52(
813 // CHECK-NEXT: entry:
814 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
815 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
816 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
817 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
818 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
819 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
820 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
821 // CHECK-NEXT: ret void
822 //
test52(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)823 void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
824 __vector_quad vq = *((__vector_quad *)vqp);
825 __vector_pair vp = *((__vector_pair *)vpp);
826 __builtin_mma_pmxvf64gerpp(&vq, vp, vc, 0, 0);
827 *((__vector_quad *)resp) = vq;
828 }
829
830 // CHECK-LABEL: @test53(
831 // CHECK-NEXT: entry:
832 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
833 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
834 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
835 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
836 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
837 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
838 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
839 // CHECK-NEXT: ret void
840 //
test53(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)841 void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
842 __vector_quad vq = *((__vector_quad *)vqp);
843 __vector_pair vp = *((__vector_pair *)vpp);
844 __builtin_mma_pmxvf64gerpn(&vq, vp, vc, 0, 0);
845 *((__vector_quad *)resp) = vq;
846 }
847
848 // CHECK-LABEL: @test54(
849 // CHECK-NEXT: entry:
850 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
851 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
852 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
853 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
854 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
855 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
856 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
857 // CHECK-NEXT: ret void
858 //
test54(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)859 void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
860 __vector_quad vq = *((__vector_quad *)vqp);
861 __vector_pair vp = *((__vector_pair *)vpp);
862 __builtin_mma_pmxvf64gernp(&vq, vp, vc, 0, 0);
863 *((__vector_quad *)resp) = vq;
864 }
865
866 // CHECK-LABEL: @test55(
867 // CHECK-NEXT: entry:
868 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
869 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
870 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
871 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
872 // CHECK-NEXT: [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
873 // CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
874 // CHECK-NEXT: store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
875 // CHECK-NEXT: ret void
876 //
test55(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)877 void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
878 __vector_quad vq = *((__vector_quad *)vqp);
879 __vector_pair vp = *((__vector_pair *)vpp);
880 __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
881 *((__vector_quad *)resp) = vq;
882 }
883
884 // CHECK-LABEL: @test56(
885 // CHECK-NEXT: entry:
886 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
887 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
888 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
889 // CHECK-NEXT: ret void
890 //
test56(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)891 void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
892 __vector_quad vq = *((__vector_quad *)vqp);
893 __vector_pair vp = *((__vector_pair *)vpp);
894 __builtin_mma_xvbf16ger2(&vq, vc, vc);
895 *((__vector_quad *)resp) = vq;
896 }
897
898 // CHECK-LABEL: @test57(
899 // CHECK-NEXT: entry:
900 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
901 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
902 // CHECK-NEXT: store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
903 // CHECK-NEXT: ret void
904 //
test57(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)905 void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
906 __vector_quad vq = *((__vector_quad *)vqp);
907 __vector_pair vp = *((__vector_pair *)vpp);
908 __builtin_mma_pmxvbf16ger2(&vq, vc, vc, 0, 0, 0);
909 *((__vector_quad *)resp) = vq;
910 }
911
912 // CHECK-LABEL: @test58(
913 // CHECK-NEXT: entry:
914 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
915 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
916 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
917 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
918 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
919 // CHECK-NEXT: ret void
920 //
test58(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)921 void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
922 __vector_quad vq = *((__vector_quad *)vqp);
923 __vector_pair vp = *((__vector_pair *)vpp);
924 __builtin_mma_xvbf16ger2pp(&vq, vc, vc);
925 *((__vector_quad *)resp) = vq;
926 }
927
928 // CHECK-LABEL: @test59(
929 // CHECK-NEXT: entry:
930 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
931 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
932 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
933 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
934 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
935 // CHECK-NEXT: ret void
936 //
test59(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)937 void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
938 __vector_quad vq = *((__vector_quad *)vqp);
939 __vector_pair vp = *((__vector_pair *)vpp);
940 __builtin_mma_xvbf16ger2pn(&vq, vc, vc);
941 *((__vector_quad *)resp) = vq;
942 }
943
944 // CHECK-LABEL: @test60(
945 // CHECK-NEXT: entry:
946 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
947 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
948 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
949 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
950 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
951 // CHECK-NEXT: ret void
952 //
test60(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)953 void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
954 __vector_quad vq = *((__vector_quad *)vqp);
955 __vector_pair vp = *((__vector_pair *)vpp);
956 __builtin_mma_xvbf16ger2np(&vq, vc, vc);
957 *((__vector_quad *)resp) = vq;
958 }
959
960 // CHECK-LABEL: @test61(
961 // CHECK-NEXT: entry:
962 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
963 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
964 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
965 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
966 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
967 // CHECK-NEXT: ret void
968 //
test61(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)969 void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
970 __vector_quad vq = *((__vector_quad *)vqp);
971 __vector_pair vp = *((__vector_pair *)vpp);
972 __builtin_mma_xvbf16ger2nn(&vq, vc, vc);
973 *((__vector_quad *)resp) = vq;
974 }
975
976 // CHECK-LABEL: @test62(
977 // CHECK-NEXT: entry:
978 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
979 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
980 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
981 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
982 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
983 // CHECK-NEXT: ret void
984 //
test62(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)985 void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
986 __vector_quad vq = *((__vector_quad *)vqp);
987 __vector_pair vp = *((__vector_pair *)vpp);
988 __builtin_mma_pmxvbf16ger2pp(&vq, vc, vc, 0, 0, 0);
989 *((__vector_quad *)resp) = vq;
990 }
991
992 // CHECK-LABEL: @test63(
993 // CHECK-NEXT: entry:
994 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
995 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
996 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
997 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
998 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
999 // CHECK-NEXT: ret void
1000 //
test63(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1001 void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1002 __vector_quad vq = *((__vector_quad *)vqp);
1003 __vector_pair vp = *((__vector_pair *)vpp);
1004 __builtin_mma_pmxvbf16ger2pn(&vq, vc, vc, 0, 0, 0);
1005 *((__vector_quad *)resp) = vq;
1006 }
1007
1008 // CHECK-LABEL: @test64(
1009 // CHECK-NEXT: entry:
1010 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1011 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
1012 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
1013 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1014 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
1015 // CHECK-NEXT: ret void
1016 //
test64(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1017 void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1018 __vector_quad vq = *((__vector_quad *)vqp);
1019 __vector_pair vp = *((__vector_pair *)vpp);
1020 __builtin_mma_pmxvbf16ger2np(&vq, vc, vc, 0, 0, 0);
1021 *((__vector_quad *)resp) = vq;
1022 }
1023
1024 // CHECK-LABEL: @test65(
1025 // CHECK-NEXT: entry:
1026 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1027 // CHECK-NEXT: [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
1028 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
1029 // CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1030 // CHECK-NEXT: store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
1031 // CHECK-NEXT: ret void
1032 //
test65(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1033 void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1034 __vector_quad vq = *((__vector_quad *)vqp);
1035 __vector_pair vp = *((__vector_pair *)vpp);
1036 __builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0);
1037 *((__vector_quad *)resp) = vq;
1038 }
1039