1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s
3 
4 // CHECK-LABEL: @test1(
5 // CHECK-NEXT:  entry:
6 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
7 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
8 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
9 // CHECK-NEXT:    ret void
10 //
test1(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)11 void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
12   __vector_quad vq = *((__vector_quad *)vqp);
13   __vector_pair vp = *((__vector_pair *)vpp);
14   __vector_quad res;
15   __builtin_mma_assemble_acc(&res, vc, vc, vc, vc);
16   *((__vector_quad *)resp) = res;
17 }
18 
19 // CHECK-LABEL: @test2(
20 // CHECK-NEXT:  entry:
21 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
22 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64
23 // CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]])
24 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
25 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0
26 // CHECK-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
27 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1
28 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
29 // CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
30 // CHECK-NEXT:    store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
31 // CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2
32 // CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32
33 // CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
34 // CHECK-NEXT:    store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16
35 // CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3
36 // CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48
37 // CHECK-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>*
38 // CHECK-NEXT:    store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16
39 // CHECK-NEXT:    ret void
40 //
test2(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)41 void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
42   __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
43 }
44 
45 // CHECK-LABEL: @test3(
46 // CHECK-NEXT:  entry:
47 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
48 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
49 // CHECK-NEXT:    store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa !6
50 // CHECK-NEXT:    ret void
51 //
test3(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)52 void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
53   __vector_quad vq = *((__vector_quad *)vqp);
54   __vector_pair vp = *((__vector_pair *)vpp);
55   __vector_pair res;
56   __builtin_mma_assemble_pair(&res, vc, vc);
57   *((__vector_pair *)resp) = res;
58 }
59 
60 // CHECK-LABEL: @test4(
61 // CHECK-NEXT:  entry:
62 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
63 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
64 // CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> [[TMP1]])
65 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
66 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
67 // CHECK-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
68 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
69 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
70 // CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
71 // CHECK-NEXT:    store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
72 // CHECK-NEXT:    ret void
73 //
test4(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)74 void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
75   __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
76 }
77 
78 // CHECK-LABEL: @test5(
79 // CHECK-NEXT:  entry:
80 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
81 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
82 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP1]])
83 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
84 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
85 // CHECK-NEXT:    ret void
86 //
test5(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)87 void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
88   __vector_quad vq = *((__vector_quad *)vqp);
89   __vector_pair vp = *((__vector_pair *)vpp);
90   __builtin_mma_xxmtacc(&vq);
91   *((__vector_quad *)resp) = vq;
92 }
93 
94 // CHECK-LABEL: @test6(
95 // CHECK-NEXT:  entry:
96 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
97 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
98 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP1]])
99 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
100 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
101 // CHECK-NEXT:    ret void
102 //
test6(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)103 void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
104   __vector_quad vq = *((__vector_quad *)vqp);
105   __vector_pair vp = *((__vector_pair *)vpp);
106   __builtin_mma_xxmfacc(&vq);
107   *((__vector_quad *)resp) = vq;
108 }
109 
110 // CHECK-LABEL: @test7(
111 // CHECK-NEXT:  entry:
112 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
113 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
114 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
115 // CHECK-NEXT:    ret void
116 //
test7(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)117 void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
118   __vector_quad vq = *((__vector_quad *)vqp);
119   __vector_pair vp = *((__vector_pair *)vpp);
120   __builtin_mma_xxsetaccz(&vq);
121   *((__vector_quad *)resp) = vq;
122 }
123 
124 // CHECK-LABEL: @test8(
125 // CHECK-NEXT:  entry:
126 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
127 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
128 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
129 // CHECK-NEXT:    ret void
130 //
test8(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)131 void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
132   __vector_quad vq = *((__vector_quad *)vqp);
133   __vector_pair vp = *((__vector_pair *)vpp);
134   __builtin_mma_xvi4ger8(&vq, vc, vc);
135   *((__vector_quad *)resp) = vq;
136 }
137 
138 // CHECK-LABEL: @test9(
139 // CHECK-NEXT:  entry:
140 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
141 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
142 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
143 // CHECK-NEXT:    ret void
144 //
test9(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)145 void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
146   __vector_quad vq = *((__vector_quad *)vqp);
147   __vector_pair vp = *((__vector_pair *)vpp);
148   __builtin_mma_xvi8ger4(&vq, vc, vc);
149   *((__vector_quad *)resp) = vq;
150 }
151 
152 // CHECK-LABEL: @test10(
153 // CHECK-NEXT:  entry:
154 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
155 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
156 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
157 // CHECK-NEXT:    ret void
158 //
test10(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)159 void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
160   __vector_quad vq = *((__vector_quad *)vqp);
161   __vector_pair vp = *((__vector_pair *)vpp);
162   __builtin_mma_xvi16ger2(&vq, vc, vc);
163   *((__vector_quad *)resp) = vq;
164 }
165 
166 // CHECK-LABEL: @test11(
167 // CHECK-NEXT:  entry:
168 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
169 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
170 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
171 // CHECK-NEXT:    ret void
172 //
test11(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)173 void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
174   __vector_quad vq = *((__vector_quad *)vqp);
175   __vector_pair vp = *((__vector_pair *)vpp);
176   __builtin_mma_xvi16ger2s(&vq, vc, vc);
177   *((__vector_quad *)resp) = vq;
178 }
179 
180 // CHECK-LABEL: @test12(
181 // CHECK-NEXT:  entry:
182 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
183 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
184 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
185 // CHECK-NEXT:    ret void
186 //
test12(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)187 void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
188   __vector_quad vq = *((__vector_quad *)vqp);
189   __vector_pair vp = *((__vector_pair *)vpp);
190   __builtin_mma_xvf16ger2(&vq, vc, vc);
191   *((__vector_quad *)resp) = vq;
192 }
193 
194 // CHECK-LABEL: @test13(
195 // CHECK-NEXT:  entry:
196 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
197 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
198 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
199 // CHECK-NEXT:    ret void
200 //
test13(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)201 void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
202   __vector_quad vq = *((__vector_quad *)vqp);
203   __vector_pair vp = *((__vector_pair *)vpp);
204   __builtin_mma_xvf32ger(&vq, vc, vc);
205   *((__vector_quad *)resp) = vq;
206 }
207 
208 // CHECK-LABEL: @test14(
209 // CHECK-NEXT:  entry:
210 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
211 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6
212 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
213 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
214 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
215 // CHECK-NEXT:    ret void
216 //
test14(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)217 void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
218   __vector_quad vq = *((__vector_quad *)vqp);
219   __vector_pair vp = *((__vector_pair *)vpp);
220   __builtin_mma_xvf64ger(&vq, vp, vc);
221   *((__vector_quad *)resp) = vq;
222 }
223 
224 // CHECK-LABEL: @test15(
225 // CHECK-NEXT:  entry:
226 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
227 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
228 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
229 // CHECK-NEXT:    ret void
230 //
test15(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)231 void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
232   __vector_quad vq = *((__vector_quad *)vqp);
233   __vector_pair vp = *((__vector_pair *)vpp);
234   __builtin_mma_pmxvi4ger8(&vq, vc, vc, 0, 0, 0);
235   *((__vector_quad *)resp) = vq;
236 }
237 
238 // CHECK-LABEL: @test16(
239 // CHECK-NEXT:  entry:
240 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
241 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
242 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
243 // CHECK-NEXT:    ret void
244 //
test16(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)245 void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
246   __vector_quad vq = *((__vector_quad *)vqp);
247   __vector_pair vp = *((__vector_pair *)vpp);
248   __builtin_mma_pmxvi8ger4(&vq, vc, vc, 0, 0, 0);
249   *((__vector_quad *)resp) = vq;
250 }
251 
252 // CHECK-LABEL: @test17(
253 // CHECK-NEXT:  entry:
254 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
255 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
256 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
257 // CHECK-NEXT:    ret void
258 //
test17(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)259 void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
260   __vector_quad vq = *((__vector_quad *)vqp);
261   __vector_pair vp = *((__vector_pair *)vpp);
262   __builtin_mma_pmxvi16ger2(&vq, vc, vc, 0, 0, 0);
263   *((__vector_quad *)resp) = vq;
264 }
265 
266 // CHECK-LABEL: @test18(
267 // CHECK-NEXT:  entry:
268 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
269 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
270 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
271 // CHECK-NEXT:    ret void
272 //
test18(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)273 void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
274   __vector_quad vq = *((__vector_quad *)vqp);
275   __vector_pair vp = *((__vector_pair *)vpp);
276   __builtin_mma_pmxvi16ger2s(&vq, vc, vc, 0, 0, 0);
277   *((__vector_quad *)resp) = vq;
278 }
279 
280 // CHECK-LABEL: @test19(
281 // CHECK-NEXT:  entry:
282 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
283 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
284 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
285 // CHECK-NEXT:    ret void
286 //
test19(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)287 void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
288   __vector_quad vq = *((__vector_quad *)vqp);
289   __vector_pair vp = *((__vector_pair *)vpp);
290   __builtin_mma_pmxvf16ger2(&vq, vc, vc, 0, 0, 0);
291   *((__vector_quad *)resp) = vq;
292 }
293 
294 // CHECK-LABEL: @test20(
295 // CHECK-NEXT:  entry:
296 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
297 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
298 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
299 // CHECK-NEXT:    ret void
300 //
test20(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)301 void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
302   __vector_quad vq = *((__vector_quad *)vqp);
303   __vector_pair vp = *((__vector_pair *)vpp);
304   __builtin_mma_pmxvf32ger(&vq, vc, vc, 0, 0);
305   *((__vector_quad *)resp) = vq;
306 }
307 
308 // CHECK-LABEL: @test21(
309 // CHECK-NEXT:  entry:
310 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
311 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa !6
312 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
313 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
314 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
315 // CHECK-NEXT:    ret void
316 //
test21(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)317 void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
318   __vector_quad vq = *((__vector_quad *)vqp);
319   __vector_pair vp = *((__vector_pair *)vpp);
320   __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0);
321   *((__vector_quad *)resp) = vq;
322 }
323 
324 // CHECK-LABEL: @test22(
325 // CHECK-NEXT:  entry:
326 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
327 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
328 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
329 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
330 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
331 // CHECK-NEXT:    ret void
332 //
test22(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)333 void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
334   __vector_quad vq = *((__vector_quad *)vqp);
335   __vector_pair vp = *((__vector_pair *)vpp);
336   __builtin_mma_xvi4ger8pp(&vq, vc, vc);
337   *((__vector_quad *)resp) = vq;
338 }
339 
340 // CHECK-LABEL: @test23(
341 // CHECK-NEXT:  entry:
342 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
343 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
344 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
345 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
346 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
347 // CHECK-NEXT:    ret void
348 //
test23(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)349 void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
350   __vector_quad vq = *((__vector_quad *)vqp);
351   __vector_pair vp = *((__vector_pair *)vpp);
352   __builtin_mma_xvi8ger4pp(&vq, vc, vc);
353   *((__vector_quad *)resp) = vq;
354 }
355 
356 // CHECK-LABEL: @test24(
357 // CHECK-NEXT:  entry:
358 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
359 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
360 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
361 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
362 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
363 // CHECK-NEXT:    ret void
364 //
test24(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)365 void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
366   __vector_quad vq = *((__vector_quad *)vqp);
367   __vector_pair vp = *((__vector_pair *)vpp);
368   __builtin_mma_xvi8ger4spp(&vq, vc, vc);
369   *((__vector_quad *)resp) = vq;
370 }
371 
372 // CHECK-LABEL: @test25(
373 // CHECK-NEXT:  entry:
374 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
375 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
376 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
377 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
378 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
379 // CHECK-NEXT:    ret void
380 //
test25(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)381 void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
382   __vector_quad vq = *((__vector_quad *)vqp);
383   __vector_pair vp = *((__vector_pair *)vpp);
384   __builtin_mma_xvi16ger2pp(&vq, vc, vc);
385   *((__vector_quad *)resp) = vq;
386 }
387 
388 // CHECK-LABEL: @test26(
389 // CHECK-NEXT:  entry:
390 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
391 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
392 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
393 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
394 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
395 // CHECK-NEXT:    ret void
396 //
test26(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)397 void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
398   __vector_quad vq = *((__vector_quad *)vqp);
399   __vector_pair vp = *((__vector_pair *)vpp);
400   __builtin_mma_xvi16ger2spp(&vq, vc, vc);
401   *((__vector_quad *)resp) = vq;
402 }
403 
404 // CHECK-LABEL: @test27(
405 // CHECK-NEXT:  entry:
406 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
407 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
408 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
409 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
410 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
411 // CHECK-NEXT:    ret void
412 //
test27(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)413 void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
414   __vector_quad vq = *((__vector_quad *)vqp);
415   __vector_pair vp = *((__vector_pair *)vpp);
416   __builtin_mma_pmxvi4ger8pp(&vq, vc, vc, 0, 0, 0);
417   *((__vector_quad *)resp) = vq;
418 }
419 
420 // CHECK-LABEL: @test28(
421 // CHECK-NEXT:  entry:
422 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
423 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
424 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
425 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
426 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
427 // CHECK-NEXT:    ret void
428 //
test28(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)429 void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
430   __vector_quad vq = *((__vector_quad *)vqp);
431   __vector_pair vp = *((__vector_pair *)vpp);
432   __builtin_mma_pmxvi8ger4pp(&vq, vc, vc, 0, 0, 0);
433   *((__vector_quad *)resp) = vq;
434 }
435 
436 // CHECK-LABEL: @test29(
437 // CHECK-NEXT:  entry:
438 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
439 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
440 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
441 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
442 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
443 // CHECK-NEXT:    ret void
444 //
test29(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)445 void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
446   __vector_quad vq = *((__vector_quad *)vqp);
447   __vector_pair vp = *((__vector_pair *)vpp);
448   __builtin_mma_pmxvi8ger4spp(&vq, vc, vc, 0, 0, 0);
449   *((__vector_quad *)resp) = vq;
450 }
451 
452 // CHECK-LABEL: @test30(
453 // CHECK-NEXT:  entry:
454 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
455 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
456 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
457 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
458 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
459 // CHECK-NEXT:    ret void
460 //
test30(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)461 void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
462   __vector_quad vq = *((__vector_quad *)vqp);
463   __vector_pair vp = *((__vector_pair *)vpp);
464   __builtin_mma_pmxvi16ger2pp(&vq, vc, vc, 0, 0, 0);
465   *((__vector_quad *)resp) = vq;
466 }
467 
468 // CHECK-LABEL: @test31(
469 // CHECK-NEXT:  entry:
470 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
471 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
472 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
473 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
474 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
475 // CHECK-NEXT:    ret void
476 //
test31(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)477 void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
478   __vector_quad vq = *((__vector_quad *)vqp);
479   __vector_pair vp = *((__vector_pair *)vpp);
480   __builtin_mma_pmxvi16ger2spp(&vq, vc, vc, 0, 0, 0);
481   *((__vector_quad *)resp) = vq;
482 }
483 
484 // CHECK-LABEL: @test32(
485 // CHECK-NEXT:  entry:
486 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
487 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
488 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
489 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
490 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
491 // CHECK-NEXT:    ret void
492 //
test32(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)493 void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
494   __vector_quad vq = *((__vector_quad *)vqp);
495   __vector_pair vp = *((__vector_pair *)vpp);
496   __builtin_mma_xvf16ger2pp(&vq, vc, vc);
497   *((__vector_quad *)resp) = vq;
498 }
499 
500 // CHECK-LABEL: @test33(
501 // CHECK-NEXT:  entry:
502 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
503 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
504 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
505 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
506 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
507 // CHECK-NEXT:    ret void
508 //
test33(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)509 void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
510   __vector_quad vq = *((__vector_quad *)vqp);
511   __vector_pair vp = *((__vector_pair *)vpp);
512   __builtin_mma_xvf16ger2pn(&vq, vc, vc);
513   *((__vector_quad *)resp) = vq;
514 }
515 
516 // CHECK-LABEL: @test34(
517 // CHECK-NEXT:  entry:
518 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
519 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
520 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
521 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
522 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
523 // CHECK-NEXT:    ret void
524 //
test34(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)525 void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
526   __vector_quad vq = *((__vector_quad *)vqp);
527   __vector_pair vp = *((__vector_pair *)vpp);
528   __builtin_mma_xvf16ger2np(&vq, vc, vc);
529   *((__vector_quad *)resp) = vq;
530 }
531 
532 // CHECK-LABEL: @test35(
533 // CHECK-NEXT:  entry:
534 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
535 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
536 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
537 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
538 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
539 // CHECK-NEXT:    ret void
540 //
test35(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)541 void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
542   __vector_quad vq = *((__vector_quad *)vqp);
543   __vector_pair vp = *((__vector_pair *)vpp);
544   __builtin_mma_xvf16ger2nn(&vq, vc, vc);
545   *((__vector_quad *)resp) = vq;
546 }
547 
548 // CHECK-LABEL: @test36(
549 // CHECK-NEXT:  entry:
550 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
551 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
552 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
553 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
554 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
555 // CHECK-NEXT:    ret void
556 //
test36(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)557 void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
558   __vector_quad vq = *((__vector_quad *)vqp);
559   __vector_pair vp = *((__vector_pair *)vpp);
560   __builtin_mma_pmxvf16ger2pp(&vq, vc, vc, 0, 0, 0);
561   *((__vector_quad *)resp) = vq;
562 }
563 
564 // CHECK-LABEL: @test37(
565 // CHECK-NEXT:  entry:
566 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
567 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
568 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
569 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
570 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
571 // CHECK-NEXT:    ret void
572 //
test37(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)573 void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
574   __vector_quad vq = *((__vector_quad *)vqp);
575   __vector_pair vp = *((__vector_pair *)vpp);
576   __builtin_mma_pmxvf16ger2pn(&vq, vc, vc, 0, 0, 0);
577   *((__vector_quad *)resp) = vq;
578 }
579 
580 // CHECK-LABEL: @test38(
581 // CHECK-NEXT:  entry:
582 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
583 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
584 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
585 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
586 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
587 // CHECK-NEXT:    ret void
588 //
test38(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)589 void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
590   __vector_quad vq = *((__vector_quad *)vqp);
591   __vector_pair vp = *((__vector_pair *)vpp);
592   __builtin_mma_pmxvf16ger2np(&vq, vc, vc, 0, 0, 0);
593   *((__vector_quad *)resp) = vq;
594 }
595 
596 // CHECK-LABEL: @test39(
597 // CHECK-NEXT:  entry:
598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
599 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
600 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
601 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
602 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
603 // CHECK-NEXT:    ret void
604 //
test39(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)605 void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
606   __vector_quad vq = *((__vector_quad *)vqp);
607   __vector_pair vp = *((__vector_pair *)vpp);
608   __builtin_mma_pmxvf16ger2nn(&vq, vc, vc, 0, 0, 0);
609   *((__vector_quad *)resp) = vq;
610 }
611 
612 // CHECK-LABEL: @test40(
613 // CHECK-NEXT:  entry:
614 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
615 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
616 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
617 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
618 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
619 // CHECK-NEXT:    ret void
620 //
test40(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)621 void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
622   __vector_quad vq = *((__vector_quad *)vqp);
623   __vector_pair vp = *((__vector_pair *)vpp);
624   __builtin_mma_xvf32gerpp(&vq, vc, vc);
625   *((__vector_quad *)resp) = vq;
626 }
627 
628 // CHECK-LABEL: @test41(
629 // CHECK-NEXT:  entry:
630 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
631 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
632 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
633 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
634 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
635 // CHECK-NEXT:    ret void
636 //
test41(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)637 void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
638   __vector_quad vq = *((__vector_quad *)vqp);
639   __vector_pair vp = *((__vector_pair *)vpp);
640   __builtin_mma_xvf32gerpn(&vq, vc, vc);
641   *((__vector_quad *)resp) = vq;
642 }
643 
644 // CHECK-LABEL: @test42(
645 // CHECK-NEXT:  entry:
646 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
647 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
648 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
649 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
650 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
651 // CHECK-NEXT:    ret void
652 //
test42(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)653 void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
654   __vector_quad vq = *((__vector_quad *)vqp);
655   __vector_pair vp = *((__vector_pair *)vpp);
656   __builtin_mma_xvf32gernp(&vq, vc, vc);
657   *((__vector_quad *)resp) = vq;
658 }
659 
660 // CHECK-LABEL: @test43(
661 // CHECK-NEXT:  entry:
662 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
663 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
664 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
665 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
666 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
667 // CHECK-NEXT:    ret void
668 //
test43(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)669 void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
670   __vector_quad vq = *((__vector_quad *)vqp);
671   __vector_pair vp = *((__vector_pair *)vpp);
672   __builtin_mma_xvf32gernn(&vq, vc, vc);
673   *((__vector_quad *)resp) = vq;
674 }
675 
676 // CHECK-LABEL: @test44(
677 // CHECK-NEXT:  entry:
678 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
679 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
680 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
681 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
682 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
683 // CHECK-NEXT:    ret void
684 //
test44(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)685 void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
686   __vector_quad vq = *((__vector_quad *)vqp);
687   __vector_pair vp = *((__vector_pair *)vpp);
688   __builtin_mma_pmxvf32gerpp(&vq, vc, vc, 0, 0);
689   *((__vector_quad *)resp) = vq;
690 }
691 
692 // CHECK-LABEL: @test45(
693 // CHECK-NEXT:  entry:
694 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
695 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
696 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
697 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
698 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
699 // CHECK-NEXT:    ret void
700 //
test45(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)701 void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
702   __vector_quad vq = *((__vector_quad *)vqp);
703   __vector_pair vp = *((__vector_pair *)vpp);
704   __builtin_mma_pmxvf32gerpn(&vq, vc, vc, 0, 0);
705   *((__vector_quad *)resp) = vq;
706 }
707 
708 // CHECK-LABEL: @test46(
709 // CHECK-NEXT:  entry:
710 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
711 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
712 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
713 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
714 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
715 // CHECK-NEXT:    ret void
716 //
test46(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)717 void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
718   __vector_quad vq = *((__vector_quad *)vqp);
719   __vector_pair vp = *((__vector_pair *)vpp);
720   __builtin_mma_pmxvf32gernp(&vq, vc, vc, 0, 0);
721   *((__vector_quad *)resp) = vq;
722 }
723 
724 // CHECK-LABEL: @test47(
725 // CHECK-NEXT:  entry:
726 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
727 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
728 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
729 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
730 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
731 // CHECK-NEXT:    ret void
732 //
test47(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)733 void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
734   __vector_quad vq = *((__vector_quad *)vqp);
735   __vector_pair vp = *((__vector_pair *)vpp);
736   __builtin_mma_pmxvf32gernn(&vq, vc, vc, 0, 0);
737   *((__vector_quad *)resp) = vq;
738 }
739 
740 // CHECK-LABEL: @test48(
741 // CHECK-NEXT:  entry:
742 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
743 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
744 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
745 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
746 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
747 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
748 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
749 // CHECK-NEXT:    ret void
750 //
test48(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)751 void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
752   __vector_quad vq = *((__vector_quad *)vqp);
753   __vector_pair vp = *((__vector_pair *)vpp);
754   __builtin_mma_xvf64gerpp(&vq, vp, vc);
755   *((__vector_quad *)resp) = vq;
756 }
757 
758 // CHECK-LABEL: @test49(
759 // CHECK-NEXT:  entry:
760 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
761 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
762 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
763 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
764 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
765 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
766 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
767 // CHECK-NEXT:    ret void
768 //
test49(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)769 void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
770   __vector_quad vq = *((__vector_quad *)vqp);
771   __vector_pair vp = *((__vector_pair *)vpp);
772   __builtin_mma_xvf64gerpn(&vq, vp, vc);
773   *((__vector_quad *)resp) = vq;
774 }
775 
776 // CHECK-LABEL: @test50(
777 // CHECK-NEXT:  entry:
778 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
779 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
780 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
781 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
782 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
783 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
784 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
785 // CHECK-NEXT:    ret void
786 //
test50(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)787 void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
788   __vector_quad vq = *((__vector_quad *)vqp);
789   __vector_pair vp = *((__vector_pair *)vpp);
790   __builtin_mma_xvf64gernp(&vq, vp, vc);
791   *((__vector_quad *)resp) = vq;
792 }
793 
794 // CHECK-LABEL: @test51(
795 // CHECK-NEXT:  entry:
796 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
797 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
798 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
799 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
800 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
801 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
802 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
803 // CHECK-NEXT:    ret void
804 //
test51(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)805 void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
806   __vector_quad vq = *((__vector_quad *)vqp);
807   __vector_pair vp = *((__vector_pair *)vpp);
808   __builtin_mma_xvf64gernn(&vq, vp, vc);
809   *((__vector_quad *)resp) = vq;
810 }
811 
812 // CHECK-LABEL: @test52(
813 // CHECK-NEXT:  entry:
814 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
815 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
816 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
817 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
818 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
819 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
820 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
821 // CHECK-NEXT:    ret void
822 //
test52(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)823 void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
824   __vector_quad vq = *((__vector_quad *)vqp);
825   __vector_pair vp = *((__vector_pair *)vpp);
826   __builtin_mma_pmxvf64gerpp(&vq, vp, vc, 0, 0);
827   *((__vector_quad *)resp) = vq;
828 }
829 
830 // CHECK-LABEL: @test53(
831 // CHECK-NEXT:  entry:
832 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
833 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
834 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
835 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
836 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
837 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
838 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
839 // CHECK-NEXT:    ret void
840 //
test53(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)841 void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
842   __vector_quad vq = *((__vector_quad *)vqp);
843   __vector_pair vp = *((__vector_pair *)vpp);
844   __builtin_mma_pmxvf64gerpn(&vq, vp, vc, 0, 0);
845   *((__vector_quad *)resp) = vq;
846 }
847 
848 // CHECK-LABEL: @test54(
849 // CHECK-NEXT:  entry:
850 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
851 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
852 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
853 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
854 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
855 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
856 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
857 // CHECK-NEXT:    ret void
858 //
test54(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)859 void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
860   __vector_quad vq = *((__vector_quad *)vqp);
861   __vector_pair vp = *((__vector_pair *)vpp);
862   __builtin_mma_pmxvf64gernp(&vq, vp, vc, 0, 0);
863   *((__vector_quad *)resp) = vq;
864 }
865 
866 // CHECK-LABEL: @test55(
867 // CHECK-NEXT:  entry:
868 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
869 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
870 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
871 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa !6
872 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
873 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
874 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa !2
875 // CHECK-NEXT:    ret void
876 //
test55(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)877 void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
878   __vector_quad vq = *((__vector_quad *)vqp);
879   __vector_pair vp = *((__vector_pair *)vpp);
880   __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
881   *((__vector_quad *)resp) = vq;
882 }
883 
884 // CHECK-LABEL: @test56(
885 // CHECK-NEXT:  entry:
886 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
887 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
888 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
889 // CHECK-NEXT:    ret void
890 //
test56(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)891 void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
892   __vector_quad vq = *((__vector_quad *)vqp);
893   __vector_pair vp = *((__vector_pair *)vpp);
894   __builtin_mma_xvbf16ger2(&vq, vc, vc);
895   *((__vector_quad *)resp) = vq;
896 }
897 
898 // CHECK-LABEL: @test57(
899 // CHECK-NEXT:  entry:
900 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
901 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
902 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa !2
903 // CHECK-NEXT:    ret void
904 //
test57(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)905 void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
906   __vector_quad vq = *((__vector_quad *)vqp);
907   __vector_pair vp = *((__vector_pair *)vpp);
908   __builtin_mma_pmxvbf16ger2(&vq, vc, vc, 0, 0, 0);
909   *((__vector_quad *)resp) = vq;
910 }
911 
912 // CHECK-LABEL: @test58(
913 // CHECK-NEXT:  entry:
914 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
915 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
916 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
917 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
918 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
919 // CHECK-NEXT:    ret void
920 //
test58(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)921 void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
922   __vector_quad vq = *((__vector_quad *)vqp);
923   __vector_pair vp = *((__vector_pair *)vpp);
924   __builtin_mma_xvbf16ger2pp(&vq, vc, vc);
925   *((__vector_quad *)resp) = vq;
926 }
927 
928 // CHECK-LABEL: @test59(
929 // CHECK-NEXT:  entry:
930 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
931 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
932 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
933 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
934 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
935 // CHECK-NEXT:    ret void
936 //
test59(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)937 void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
938   __vector_quad vq = *((__vector_quad *)vqp);
939   __vector_pair vp = *((__vector_pair *)vpp);
940   __builtin_mma_xvbf16ger2pn(&vq, vc, vc);
941   *((__vector_quad *)resp) = vq;
942 }
943 
944 // CHECK-LABEL: @test60(
945 // CHECK-NEXT:  entry:
946 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
947 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
948 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
949 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
950 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
951 // CHECK-NEXT:    ret void
952 //
test60(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)953 void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
954   __vector_quad vq = *((__vector_quad *)vqp);
955   __vector_pair vp = *((__vector_pair *)vpp);
956   __builtin_mma_xvbf16ger2np(&vq, vc, vc);
957   *((__vector_quad *)resp) = vq;
958 }
959 
960 // CHECK-LABEL: @test61(
961 // CHECK-NEXT:  entry:
962 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
963 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
964 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
965 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
966 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
967 // CHECK-NEXT:    ret void
968 //
test61(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)969 void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
970   __vector_quad vq = *((__vector_quad *)vqp);
971   __vector_pair vp = *((__vector_pair *)vpp);
972   __builtin_mma_xvbf16ger2nn(&vq, vc, vc);
973   *((__vector_quad *)resp) = vq;
974 }
975 
976 // CHECK-LABEL: @test62(
977 // CHECK-NEXT:  entry:
978 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
979 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
980 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
981 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
982 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
983 // CHECK-NEXT:    ret void
984 //
test62(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)985 void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
986   __vector_quad vq = *((__vector_quad *)vqp);
987   __vector_pair vp = *((__vector_pair *)vpp);
988   __builtin_mma_pmxvbf16ger2pp(&vq, vc, vc, 0, 0, 0);
989   *((__vector_quad *)resp) = vq;
990 }
991 
992 // CHECK-LABEL: @test63(
993 // CHECK-NEXT:  entry:
994 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
995 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
996 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
997 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
998 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
999 // CHECK-NEXT:    ret void
1000 //
test63(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1001 void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1002   __vector_quad vq = *((__vector_quad *)vqp);
1003   __vector_pair vp = *((__vector_pair *)vpp);
1004   __builtin_mma_pmxvbf16ger2pn(&vq, vc, vc, 0, 0, 0);
1005   *((__vector_quad *)resp) = vq;
1006 }
1007 
1008 // CHECK-LABEL: @test64(
1009 // CHECK-NEXT:  entry:
1010 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1011 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
1012 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
1013 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1014 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
1015 // CHECK-NEXT:    ret void
1016 //
test64(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1017 void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1018   __vector_quad vq = *((__vector_quad *)vqp);
1019   __vector_pair vp = *((__vector_pair *)vpp);
1020   __builtin_mma_pmxvbf16ger2np(&vq, vc, vc, 0, 0, 0);
1021   *((__vector_quad *)resp) = vq;
1022 }
1023 
1024 // CHECK-LABEL: @test65(
1025 // CHECK-NEXT:  entry:
1026 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1027 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa !2
1028 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
1029 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1030 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa !2
1031 // CHECK-NEXT:    ret void
1032 //
test65(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1033 void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1034   __vector_quad vq = *((__vector_quad *)vqp);
1035   __vector_pair vp = *((__vector_pair *)vpp);
1036   __builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0);
1037   *((__vector_quad *)resp) = vq;
1038 }
1039