1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu future -emit-llvm %s -o - | FileCheck %s
3 
4 // CHECK-LABEL: @test1(
5 // CHECK-NEXT:  entry:
6 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
7 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
8 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2:![0-9]+]]
9 // CHECK-NEXT:    ret void
10 //
test1(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)11 void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
12   __vector_quad vq = *((__vector_quad *)vqp);
13   __vector_pair vp = *((__vector_pair *)vpp);
14   __vector_quad res;
15   __builtin_mma_assemble_acc(&res, vc, vc, vc, vc);
16   *((__vector_quad *)resp) = res;
17 }
18 
19 // CHECK-LABEL: @test2(
20 // CHECK-NEXT:  entry:
21 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
22 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64
23 // CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP1]])
24 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
25 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 0
26 // CHECK-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
27 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 1
28 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
29 // CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
30 // CHECK-NEXT:    store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
31 // CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 2
32 // CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 32
33 // CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <16 x i8>*
34 // CHECK-NEXT:    store <16 x i8> [[TMP8]], <16 x i8>* [[TMP10]], align 16
35 // CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP2]], 3
36 // CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 48
37 // CHECK-NEXT:    [[TMP13:%.*]] = bitcast i8* [[TMP12]] to <16 x i8>*
38 // CHECK-NEXT:    store <16 x i8> [[TMP11]], <16 x i8>* [[TMP13]], align 16
39 // CHECK-NEXT:    ret void
40 //
test2(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)41 void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
42   __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
43 }
44 
45 // CHECK-LABEL: @test3(
46 // CHECK-NEXT:  entry:
47 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
48 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
49 // CHECK-NEXT:    store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6:![0-9]+]]
50 // CHECK-NEXT:    ret void
51 //
test3(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)52 void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
53   __vector_quad vq = *((__vector_quad *)vqp);
54   __vector_pair vp = *((__vector_pair *)vpp);
55   __vector_pair res;
56   __builtin_vsx_assemble_pair(&res, vc, vc);
57   *((__vector_pair *)resp) = res;
58 }
59 
60 // CHECK-LABEL: @test4(
61 // CHECK-NEXT:  entry:
62 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
63 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
64 // CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]])
65 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
66 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
67 // CHECK-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
68 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
69 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
70 // CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
71 // CHECK-NEXT:    store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
72 // CHECK-NEXT:    ret void
73 //
test4(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)74 void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
75   __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
76 }
77 
78 // CHECK-LABEL: @test5(
79 // CHECK-NEXT:  entry:
80 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
81 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
82 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP1]])
83 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
84 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
85 // CHECK-NEXT:    ret void
86 //
test5(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)87 void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
88   __vector_quad vq = *((__vector_quad *)vqp);
89   __vector_pair vp = *((__vector_pair *)vpp);
90   __builtin_mma_xxmtacc(&vq);
91   *((__vector_quad *)resp) = vq;
92 }
93 
94 // CHECK-LABEL: @test6(
95 // CHECK-NEXT:  entry:
96 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
97 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
98 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP1]])
99 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
100 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
101 // CHECK-NEXT:    ret void
102 //
test6(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)103 void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
104   __vector_quad vq = *((__vector_quad *)vqp);
105   __vector_pair vp = *((__vector_pair *)vpp);
106   __builtin_mma_xxmfacc(&vq);
107   *((__vector_quad *)resp) = vq;
108 }
109 
110 // CHECK-LABEL: @test7(
111 // CHECK-NEXT:  entry:
112 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
113 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
114 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
115 // CHECK-NEXT:    ret void
116 //
test7(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)117 void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
118   __vector_quad vq = *((__vector_quad *)vqp);
119   __vector_pair vp = *((__vector_pair *)vpp);
120   __builtin_mma_xxsetaccz(&vq);
121   *((__vector_quad *)resp) = vq;
122 }
123 
124 // CHECK-LABEL: @test8(
125 // CHECK-NEXT:  entry:
126 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
127 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
128 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
129 // CHECK-NEXT:    ret void
130 //
test8(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)131 void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
132   __vector_quad vq = *((__vector_quad *)vqp);
133   __vector_pair vp = *((__vector_pair *)vpp);
134   __builtin_mma_xvi4ger8(&vq, vc, vc);
135   *((__vector_quad *)resp) = vq;
136 }
137 
138 // CHECK-LABEL: @test9(
139 // CHECK-NEXT:  entry:
140 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
141 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
142 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
143 // CHECK-NEXT:    ret void
144 //
test9(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)145 void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
146   __vector_quad vq = *((__vector_quad *)vqp);
147   __vector_pair vp = *((__vector_pair *)vpp);
148   __builtin_mma_xvi8ger4(&vq, vc, vc);
149   *((__vector_quad *)resp) = vq;
150 }
151 
152 // CHECK-LABEL: @test10(
153 // CHECK-NEXT:  entry:
154 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
155 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
156 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
157 // CHECK-NEXT:    ret void
158 //
test10(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)159 void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
160   __vector_quad vq = *((__vector_quad *)vqp);
161   __vector_pair vp = *((__vector_pair *)vpp);
162   __builtin_mma_xvi16ger2(&vq, vc, vc);
163   *((__vector_quad *)resp) = vq;
164 }
165 
166 // CHECK-LABEL: @test11(
167 // CHECK-NEXT:  entry:
168 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
169 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
170 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
171 // CHECK-NEXT:    ret void
172 //
test11(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)173 void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
174   __vector_quad vq = *((__vector_quad *)vqp);
175   __vector_pair vp = *((__vector_pair *)vpp);
176   __builtin_mma_xvi16ger2s(&vq, vc, vc);
177   *((__vector_quad *)resp) = vq;
178 }
179 
180 // CHECK-LABEL: @test12(
181 // CHECK-NEXT:  entry:
182 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
183 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
184 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
185 // CHECK-NEXT:    ret void
186 //
test12(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)187 void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
188   __vector_quad vq = *((__vector_quad *)vqp);
189   __vector_pair vp = *((__vector_pair *)vpp);
190   __builtin_mma_xvf16ger2(&vq, vc, vc);
191   *((__vector_quad *)resp) = vq;
192 }
193 
194 // CHECK-LABEL: @test13(
195 // CHECK-NEXT:  entry:
196 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
197 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
198 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
199 // CHECK-NEXT:    ret void
200 //
test13(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)201 void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
202   __vector_quad vq = *((__vector_quad *)vqp);
203   __vector_pair vp = *((__vector_pair *)vpp);
204   __builtin_mma_xvf32ger(&vq, vc, vc);
205   *((__vector_quad *)resp) = vq;
206 }
207 
208 // CHECK-LABEL: @test14(
209 // CHECK-NEXT:  entry:
210 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
211 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]]
212 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
213 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
214 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
215 // CHECK-NEXT:    ret void
216 //
test14(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)217 void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
218   __vector_quad vq = *((__vector_quad *)vqp);
219   __vector_pair vp = *((__vector_pair *)vpp);
220   __builtin_mma_xvf64ger(&vq, vp, vc);
221   *((__vector_quad *)resp) = vq;
222 }
223 
224 // CHECK-LABEL: @test15(
225 // CHECK-NEXT:  entry:
226 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
227 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
228 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
229 // CHECK-NEXT:    ret void
230 //
test15(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)231 void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
232   __vector_quad vq = *((__vector_quad *)vqp);
233   __vector_pair vp = *((__vector_pair *)vpp);
234   __builtin_mma_pmxvi4ger8(&vq, vc, vc, 0, 0, 0);
235   *((__vector_quad *)resp) = vq;
236 }
237 
238 // CHECK-LABEL: @test16(
239 // CHECK-NEXT:  entry:
240 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
241 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
242 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
243 // CHECK-NEXT:    ret void
244 //
test16(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)245 void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
246   __vector_quad vq = *((__vector_quad *)vqp);
247   __vector_pair vp = *((__vector_pair *)vpp);
248   __builtin_mma_pmxvi8ger4(&vq, vc, vc, 0, 0, 0);
249   *((__vector_quad *)resp) = vq;
250 }
251 
252 // CHECK-LABEL: @test17(
253 // CHECK-NEXT:  entry:
254 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
255 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
256 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
257 // CHECK-NEXT:    ret void
258 //
test17(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)259 void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
260   __vector_quad vq = *((__vector_quad *)vqp);
261   __vector_pair vp = *((__vector_pair *)vpp);
262   __builtin_mma_pmxvi16ger2(&vq, vc, vc, 0, 0, 0);
263   *((__vector_quad *)resp) = vq;
264 }
265 
266 // CHECK-LABEL: @test18(
267 // CHECK-NEXT:  entry:
268 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
269 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
270 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
271 // CHECK-NEXT:    ret void
272 //
test18(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)273 void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
274   __vector_quad vq = *((__vector_quad *)vqp);
275   __vector_pair vp = *((__vector_pair *)vpp);
276   __builtin_mma_pmxvi16ger2s(&vq, vc, vc, 0, 0, 0);
277   *((__vector_quad *)resp) = vq;
278 }
279 
280 // CHECK-LABEL: @test19(
281 // CHECK-NEXT:  entry:
282 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
283 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
284 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
285 // CHECK-NEXT:    ret void
286 //
test19(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)287 void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
288   __vector_quad vq = *((__vector_quad *)vqp);
289   __vector_pair vp = *((__vector_pair *)vpp);
290   __builtin_mma_pmxvf16ger2(&vq, vc, vc, 0, 0, 0);
291   *((__vector_quad *)resp) = vq;
292 }
293 
294 // CHECK-LABEL: @test20(
295 // CHECK-NEXT:  entry:
296 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
297 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
298 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
299 // CHECK-NEXT:    ret void
300 //
test20(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)301 void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
302   __vector_quad vq = *((__vector_quad *)vqp);
303   __vector_pair vp = *((__vector_pair *)vpp);
304   __builtin_mma_pmxvf32ger(&vq, vc, vc, 0, 0);
305   *((__vector_quad *)resp) = vq;
306 }
307 
308 // CHECK-LABEL: @test21(
309 // CHECK-NEXT:  entry:
310 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
311 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32, !tbaa [[TBAA6]]
312 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
313 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
314 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
315 // CHECK-NEXT:    ret void
316 //
test21(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)317 void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
318   __vector_quad vq = *((__vector_quad *)vqp);
319   __vector_pair vp = *((__vector_pair *)vpp);
320   __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0);
321   *((__vector_quad *)resp) = vq;
322 }
323 
324 // CHECK-LABEL: @test22(
325 // CHECK-NEXT:  entry:
326 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
327 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
328 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
329 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
330 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
331 // CHECK-NEXT:    ret void
332 //
test22(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)333 void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
334   __vector_quad vq = *((__vector_quad *)vqp);
335   __vector_pair vp = *((__vector_pair *)vpp);
336   __builtin_mma_xvi4ger8pp(&vq, vc, vc);
337   *((__vector_quad *)resp) = vq;
338 }
339 
340 // CHECK-LABEL: @test23(
341 // CHECK-NEXT:  entry:
342 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
343 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
344 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
345 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
346 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
347 // CHECK-NEXT:    ret void
348 //
test23(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)349 void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
350   __vector_quad vq = *((__vector_quad *)vqp);
351   __vector_pair vp = *((__vector_pair *)vpp);
352   __builtin_mma_xvi8ger4pp(&vq, vc, vc);
353   *((__vector_quad *)resp) = vq;
354 }
355 
356 // CHECK-LABEL: @test24(
357 // CHECK-NEXT:  entry:
358 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
359 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
360 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
361 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
362 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
363 // CHECK-NEXT:    ret void
364 //
test24(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)365 void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
366   __vector_quad vq = *((__vector_quad *)vqp);
367   __vector_pair vp = *((__vector_pair *)vpp);
368   __builtin_mma_xvi8ger4spp(&vq, vc, vc);
369   *((__vector_quad *)resp) = vq;
370 }
371 
372 // CHECK-LABEL: @test25(
373 // CHECK-NEXT:  entry:
374 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
375 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
376 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
377 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
378 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
379 // CHECK-NEXT:    ret void
380 //
test25(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)381 void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
382   __vector_quad vq = *((__vector_quad *)vqp);
383   __vector_pair vp = *((__vector_pair *)vpp);
384   __builtin_mma_xvi16ger2pp(&vq, vc, vc);
385   *((__vector_quad *)resp) = vq;
386 }
387 
388 // CHECK-LABEL: @test26(
389 // CHECK-NEXT:  entry:
390 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
391 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
392 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
393 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
394 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
395 // CHECK-NEXT:    ret void
396 //
test26(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)397 void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
398   __vector_quad vq = *((__vector_quad *)vqp);
399   __vector_pair vp = *((__vector_pair *)vpp);
400   __builtin_mma_xvi16ger2spp(&vq, vc, vc);
401   *((__vector_quad *)resp) = vq;
402 }
403 
404 // CHECK-LABEL: @test27(
405 // CHECK-NEXT:  entry:
406 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
407 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
408 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
409 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
410 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
411 // CHECK-NEXT:    ret void
412 //
test27(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)413 void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
414   __vector_quad vq = *((__vector_quad *)vqp);
415   __vector_pair vp = *((__vector_pair *)vpp);
416   __builtin_mma_pmxvi4ger8pp(&vq, vc, vc, 0, 0, 0);
417   *((__vector_quad *)resp) = vq;
418 }
419 
420 // CHECK-LABEL: @test28(
421 // CHECK-NEXT:  entry:
422 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
423 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
424 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
425 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
426 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
427 // CHECK-NEXT:    ret void
428 //
test28(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)429 void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
430   __vector_quad vq = *((__vector_quad *)vqp);
431   __vector_pair vp = *((__vector_pair *)vpp);
432   __builtin_mma_pmxvi8ger4pp(&vq, vc, vc, 0, 0, 0);
433   *((__vector_quad *)resp) = vq;
434 }
435 
436 // CHECK-LABEL: @test29(
437 // CHECK-NEXT:  entry:
438 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
439 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
440 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
441 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
442 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
443 // CHECK-NEXT:    ret void
444 //
test29(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)445 void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
446   __vector_quad vq = *((__vector_quad *)vqp);
447   __vector_pair vp = *((__vector_pair *)vpp);
448   __builtin_mma_pmxvi8ger4spp(&vq, vc, vc, 0, 0, 0);
449   *((__vector_quad *)resp) = vq;
450 }
451 
452 // CHECK-LABEL: @test30(
453 // CHECK-NEXT:  entry:
454 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
455 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
456 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
457 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
458 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
459 // CHECK-NEXT:    ret void
460 //
test30(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)461 void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
462   __vector_quad vq = *((__vector_quad *)vqp);
463   __vector_pair vp = *((__vector_pair *)vpp);
464   __builtin_mma_pmxvi16ger2pp(&vq, vc, vc, 0, 0, 0);
465   *((__vector_quad *)resp) = vq;
466 }
467 
468 // CHECK-LABEL: @test31(
469 // CHECK-NEXT:  entry:
470 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
471 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
472 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
473 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
474 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
475 // CHECK-NEXT:    ret void
476 //
test31(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)477 void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
478   __vector_quad vq = *((__vector_quad *)vqp);
479   __vector_pair vp = *((__vector_pair *)vpp);
480   __builtin_mma_pmxvi16ger2spp(&vq, vc, vc, 0, 0, 0);
481   *((__vector_quad *)resp) = vq;
482 }
483 
484 // CHECK-LABEL: @test32(
485 // CHECK-NEXT:  entry:
486 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
487 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
488 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
489 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
490 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
491 // CHECK-NEXT:    ret void
492 //
test32(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)493 void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
494   __vector_quad vq = *((__vector_quad *)vqp);
495   __vector_pair vp = *((__vector_pair *)vpp);
496   __builtin_mma_xvf16ger2pp(&vq, vc, vc);
497   *((__vector_quad *)resp) = vq;
498 }
499 
500 // CHECK-LABEL: @test33(
501 // CHECK-NEXT:  entry:
502 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
503 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
504 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
505 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
506 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
507 // CHECK-NEXT:    ret void
508 //
test33(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)509 void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
510   __vector_quad vq = *((__vector_quad *)vqp);
511   __vector_pair vp = *((__vector_pair *)vpp);
512   __builtin_mma_xvf16ger2pn(&vq, vc, vc);
513   *((__vector_quad *)resp) = vq;
514 }
515 
516 // CHECK-LABEL: @test34(
517 // CHECK-NEXT:  entry:
518 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
519 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
520 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
521 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
522 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
523 // CHECK-NEXT:    ret void
524 //
test34(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)525 void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
526   __vector_quad vq = *((__vector_quad *)vqp);
527   __vector_pair vp = *((__vector_pair *)vpp);
528   __builtin_mma_xvf16ger2np(&vq, vc, vc);
529   *((__vector_quad *)resp) = vq;
530 }
531 
532 // CHECK-LABEL: @test35(
533 // CHECK-NEXT:  entry:
534 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
535 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
536 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
537 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
538 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
539 // CHECK-NEXT:    ret void
540 //
test35(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)541 void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
542   __vector_quad vq = *((__vector_quad *)vqp);
543   __vector_pair vp = *((__vector_pair *)vpp);
544   __builtin_mma_xvf16ger2nn(&vq, vc, vc);
545   *((__vector_quad *)resp) = vq;
546 }
547 
548 // CHECK-LABEL: @test36(
549 // CHECK-NEXT:  entry:
550 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
551 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
552 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
553 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
554 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
555 // CHECK-NEXT:    ret void
556 //
test36(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)557 void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
558   __vector_quad vq = *((__vector_quad *)vqp);
559   __vector_pair vp = *((__vector_pair *)vpp);
560   __builtin_mma_pmxvf16ger2pp(&vq, vc, vc, 0, 0, 0);
561   *((__vector_quad *)resp) = vq;
562 }
563 
564 // CHECK-LABEL: @test37(
565 // CHECK-NEXT:  entry:
566 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
567 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
568 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
569 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
570 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
571 // CHECK-NEXT:    ret void
572 //
test37(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)573 void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
574   __vector_quad vq = *((__vector_quad *)vqp);
575   __vector_pair vp = *((__vector_pair *)vpp);
576   __builtin_mma_pmxvf16ger2pn(&vq, vc, vc, 0, 0, 0);
577   *((__vector_quad *)resp) = vq;
578 }
579 
580 // CHECK-LABEL: @test38(
581 // CHECK-NEXT:  entry:
582 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
583 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
584 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
585 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
586 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
587 // CHECK-NEXT:    ret void
588 //
test38(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)589 void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
590   __vector_quad vq = *((__vector_quad *)vqp);
591   __vector_pair vp = *((__vector_pair *)vpp);
592   __builtin_mma_pmxvf16ger2np(&vq, vc, vc, 0, 0, 0);
593   *((__vector_quad *)resp) = vq;
594 }
595 
596 // CHECK-LABEL: @test39(
597 // CHECK-NEXT:  entry:
598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
599 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
600 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
601 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
602 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
603 // CHECK-NEXT:    ret void
604 //
test39(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)605 void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
606   __vector_quad vq = *((__vector_quad *)vqp);
607   __vector_pair vp = *((__vector_pair *)vpp);
608   __builtin_mma_pmxvf16ger2nn(&vq, vc, vc, 0, 0, 0);
609   *((__vector_quad *)resp) = vq;
610 }
611 
612 // CHECK-LABEL: @test40(
613 // CHECK-NEXT:  entry:
614 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
615 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
616 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
617 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
618 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
619 // CHECK-NEXT:    ret void
620 //
test40(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)621 void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
622   __vector_quad vq = *((__vector_quad *)vqp);
623   __vector_pair vp = *((__vector_pair *)vpp);
624   __builtin_mma_xvf32gerpp(&vq, vc, vc);
625   *((__vector_quad *)resp) = vq;
626 }
627 
628 // CHECK-LABEL: @test41(
629 // CHECK-NEXT:  entry:
630 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
631 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
632 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
633 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
634 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
635 // CHECK-NEXT:    ret void
636 //
test41(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)637 void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
638   __vector_quad vq = *((__vector_quad *)vqp);
639   __vector_pair vp = *((__vector_pair *)vpp);
640   __builtin_mma_xvf32gerpn(&vq, vc, vc);
641   *((__vector_quad *)resp) = vq;
642 }
643 
644 // CHECK-LABEL: @test42(
645 // CHECK-NEXT:  entry:
646 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
647 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
648 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
649 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
650 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
651 // CHECK-NEXT:    ret void
652 //
test42(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)653 void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
654   __vector_quad vq = *((__vector_quad *)vqp);
655   __vector_pair vp = *((__vector_pair *)vpp);
656   __builtin_mma_xvf32gernp(&vq, vc, vc);
657   *((__vector_quad *)resp) = vq;
658 }
659 
660 // CHECK-LABEL: @test43(
661 // CHECK-NEXT:  entry:
662 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
663 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
664 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
665 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
666 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
667 // CHECK-NEXT:    ret void
668 //
test43(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)669 void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
670   __vector_quad vq = *((__vector_quad *)vqp);
671   __vector_pair vp = *((__vector_pair *)vpp);
672   __builtin_mma_xvf32gernn(&vq, vc, vc);
673   *((__vector_quad *)resp) = vq;
674 }
675 
676 // CHECK-LABEL: @test44(
677 // CHECK-NEXT:  entry:
678 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
679 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
680 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
681 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
682 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
683 // CHECK-NEXT:    ret void
684 //
test44(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)685 void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
686   __vector_quad vq = *((__vector_quad *)vqp);
687   __vector_pair vp = *((__vector_pair *)vpp);
688   __builtin_mma_pmxvf32gerpp(&vq, vc, vc, 0, 0);
689   *((__vector_quad *)resp) = vq;
690 }
691 
692 // CHECK-LABEL: @test45(
693 // CHECK-NEXT:  entry:
694 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
695 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
696 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
697 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
698 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
699 // CHECK-NEXT:    ret void
700 //
test45(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)701 void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
702   __vector_quad vq = *((__vector_quad *)vqp);
703   __vector_pair vp = *((__vector_pair *)vpp);
704   __builtin_mma_pmxvf32gerpn(&vq, vc, vc, 0, 0);
705   *((__vector_quad *)resp) = vq;
706 }
707 
708 // CHECK-LABEL: @test46(
709 // CHECK-NEXT:  entry:
710 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
711 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
712 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
713 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
714 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
715 // CHECK-NEXT:    ret void
716 //
test46(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)717 void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
718   __vector_quad vq = *((__vector_quad *)vqp);
719   __vector_pair vp = *((__vector_pair *)vpp);
720   __builtin_mma_pmxvf32gernp(&vq, vc, vc, 0, 0);
721   *((__vector_quad *)resp) = vq;
722 }
723 
724 // CHECK-LABEL: @test47(
725 // CHECK-NEXT:  entry:
726 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
727 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
728 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
729 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
730 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
731 // CHECK-NEXT:    ret void
732 //
test47(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)733 void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
734   __vector_quad vq = *((__vector_quad *)vqp);
735   __vector_pair vp = *((__vector_pair *)vpp);
736   __builtin_mma_pmxvf32gernn(&vq, vc, vc, 0, 0);
737   *((__vector_quad *)resp) = vq;
738 }
739 
740 // CHECK-LABEL: @test48(
741 // CHECK-NEXT:  entry:
742 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
743 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
744 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
745 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
746 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
747 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
748 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
749 // CHECK-NEXT:    ret void
750 //
test48(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)751 void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
752   __vector_quad vq = *((__vector_quad *)vqp);
753   __vector_pair vp = *((__vector_pair *)vpp);
754   __builtin_mma_xvf64gerpp(&vq, vp, vc);
755   *((__vector_quad *)resp) = vq;
756 }
757 
758 // CHECK-LABEL: @test49(
759 // CHECK-NEXT:  entry:
760 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
761 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
762 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
763 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
764 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
765 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
766 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
767 // CHECK-NEXT:    ret void
768 //
test49(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)769 void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
770   __vector_quad vq = *((__vector_quad *)vqp);
771   __vector_pair vp = *((__vector_pair *)vpp);
772   __builtin_mma_xvf64gerpn(&vq, vp, vc);
773   *((__vector_quad *)resp) = vq;
774 }
775 
776 // CHECK-LABEL: @test50(
777 // CHECK-NEXT:  entry:
778 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
779 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
780 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
781 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
782 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
783 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
784 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
785 // CHECK-NEXT:    ret void
786 //
test50(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)787 void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
788   __vector_quad vq = *((__vector_quad *)vqp);
789   __vector_pair vp = *((__vector_pair *)vpp);
790   __builtin_mma_xvf64gernp(&vq, vp, vc);
791   *((__vector_quad *)resp) = vq;
792 }
793 
794 // CHECK-LABEL: @test51(
795 // CHECK-NEXT:  entry:
796 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
797 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
798 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
799 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
800 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
801 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
802 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
803 // CHECK-NEXT:    ret void
804 //
test51(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)805 void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
806   __vector_quad vq = *((__vector_quad *)vqp);
807   __vector_pair vp = *((__vector_pair *)vpp);
808   __builtin_mma_xvf64gernn(&vq, vp, vc);
809   *((__vector_quad *)resp) = vq;
810 }
811 
812 // CHECK-LABEL: @test52(
813 // CHECK-NEXT:  entry:
814 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
815 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
816 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
817 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
818 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
819 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
820 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
821 // CHECK-NEXT:    ret void
822 //
test52(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)823 void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
824   __vector_quad vq = *((__vector_quad *)vqp);
825   __vector_pair vp = *((__vector_pair *)vpp);
826   __builtin_mma_pmxvf64gerpp(&vq, vp, vc, 0, 0);
827   *((__vector_quad *)resp) = vq;
828 }
829 
830 // CHECK-LABEL: @test53(
831 // CHECK-NEXT:  entry:
832 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
833 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
834 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
835 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
836 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
837 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
838 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
839 // CHECK-NEXT:    ret void
840 //
test53(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)841 void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
842   __vector_quad vq = *((__vector_quad *)vqp);
843   __vector_pair vp = *((__vector_pair *)vpp);
844   __builtin_mma_pmxvf64gerpn(&vq, vp, vc, 0, 0);
845   *((__vector_quad *)resp) = vq;
846 }
847 
848 // CHECK-LABEL: @test54(
849 // CHECK-NEXT:  entry:
850 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
851 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
852 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
853 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
854 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
855 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
856 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
857 // CHECK-NEXT:    ret void
858 //
test54(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)859 void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
860   __vector_quad vq = *((__vector_quad *)vqp);
861   __vector_pair vp = *((__vector_pair *)vpp);
862   __builtin_mma_pmxvf64gernp(&vq, vp, vc, 0, 0);
863   *((__vector_quad *)resp) = vq;
864 }
865 
866 // CHECK-LABEL: @test55(
867 // CHECK-NEXT:  entry:
868 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
869 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
870 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
871 // CHECK-NEXT:    [[TMP3:%.*]] = load <256 x i1>, <256 x i1>* [[TMP2]], align 32, !tbaa [[TBAA6]]
872 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
873 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
874 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
875 // CHECK-NEXT:    ret void
876 //
test55(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)877 void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
878   __vector_quad vq = *((__vector_quad *)vqp);
879   __vector_pair vp = *((__vector_pair *)vpp);
880   __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
881   *((__vector_quad *)resp) = vq;
882 }
883 
884 // CHECK-LABEL: @test56(
885 // CHECK-NEXT:  entry:
886 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
887 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
888 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
889 // CHECK-NEXT:    ret void
890 //
test56(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)891 void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
892   __vector_quad vq = *((__vector_quad *)vqp);
893   __vector_pair vp = *((__vector_pair *)vpp);
894   __builtin_mma_xvbf16ger2(&vq, vc, vc);
895   *((__vector_quad *)resp) = vq;
896 }
897 
898 // CHECK-LABEL: @test57(
899 // CHECK-NEXT:  entry:
900 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
901 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
902 // CHECK-NEXT:    store <512 x i1> [[TMP0]], <512 x i1>* [[TMP1]], align 64, !tbaa [[TBAA2]]
903 // CHECK-NEXT:    ret void
904 //
test57(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)905 void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
906   __vector_quad vq = *((__vector_quad *)vqp);
907   __vector_pair vp = *((__vector_pair *)vpp);
908   __builtin_mma_pmxvbf16ger2(&vq, vc, vc, 0, 0, 0);
909   *((__vector_quad *)resp) = vq;
910 }
911 
912 // CHECK-LABEL: @test58(
913 // CHECK-NEXT:  entry:
914 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
915 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
916 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
917 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
918 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
919 // CHECK-NEXT:    ret void
920 //
test58(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)921 void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
922   __vector_quad vq = *((__vector_quad *)vqp);
923   __vector_pair vp = *((__vector_pair *)vpp);
924   __builtin_mma_xvbf16ger2pp(&vq, vc, vc);
925   *((__vector_quad *)resp) = vq;
926 }
927 
928 // CHECK-LABEL: @test59(
929 // CHECK-NEXT:  entry:
930 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
931 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
932 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
933 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
934 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
935 // CHECK-NEXT:    ret void
936 //
test59(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)937 void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
938   __vector_quad vq = *((__vector_quad *)vqp);
939   __vector_pair vp = *((__vector_pair *)vpp);
940   __builtin_mma_xvbf16ger2pn(&vq, vc, vc);
941   *((__vector_quad *)resp) = vq;
942 }
943 
944 // CHECK-LABEL: @test60(
945 // CHECK-NEXT:  entry:
946 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
947 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
948 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
949 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
950 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
951 // CHECK-NEXT:    ret void
952 //
test60(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)953 void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
954   __vector_quad vq = *((__vector_quad *)vqp);
955   __vector_pair vp = *((__vector_pair *)vpp);
956   __builtin_mma_xvbf16ger2np(&vq, vc, vc);
957   *((__vector_quad *)resp) = vq;
958 }
959 
960 // CHECK-LABEL: @test61(
961 // CHECK-NEXT:  entry:
962 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
963 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
964 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
965 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
966 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
967 // CHECK-NEXT:    ret void
968 //
test61(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)969 void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
970   __vector_quad vq = *((__vector_quad *)vqp);
971   __vector_pair vp = *((__vector_pair *)vpp);
972   __builtin_mma_xvbf16ger2nn(&vq, vc, vc);
973   *((__vector_quad *)resp) = vq;
974 }
975 
976 // CHECK-LABEL: @test62(
977 // CHECK-NEXT:  entry:
978 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
979 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
980 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
981 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
982 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
983 // CHECK-NEXT:    ret void
984 //
test62(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)985 void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
986   __vector_quad vq = *((__vector_quad *)vqp);
987   __vector_pair vp = *((__vector_pair *)vpp);
988   __builtin_mma_pmxvbf16ger2pp(&vq, vc, vc, 0, 0, 0);
989   *((__vector_quad *)resp) = vq;
990 }
991 
992 // CHECK-LABEL: @test63(
993 // CHECK-NEXT:  entry:
994 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
995 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
996 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
997 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
998 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
999 // CHECK-NEXT:    ret void
1000 //
test63(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1001 void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1002   __vector_quad vq = *((__vector_quad *)vqp);
1003   __vector_pair vp = *((__vector_pair *)vpp);
1004   __builtin_mma_pmxvbf16ger2pn(&vq, vc, vc, 0, 0, 0);
1005   *((__vector_quad *)resp) = vq;
1006 }
1007 
1008 // CHECK-LABEL: @test64(
1009 // CHECK-NEXT:  entry:
1010 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1011 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1012 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
1013 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1014 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
1015 // CHECK-NEXT:    ret void
1016 //
test64(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1017 void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1018   __vector_quad vq = *((__vector_quad *)vqp);
1019   __vector_pair vp = *((__vector_pair *)vpp);
1020   __builtin_mma_pmxvbf16ger2np(&vq, vc, vc, 0, 0, 0);
1021   *((__vector_quad *)resp) = vq;
1022 }
1023 
1024 // CHECK-LABEL: @test65(
1025 // CHECK-NEXT:  entry:
1026 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1027 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1028 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
1029 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1030 // CHECK-NEXT:    store <512 x i1> [[TMP2]], <512 x i1>* [[TMP3]], align 64, !tbaa [[TBAA2]]
1031 // CHECK-NEXT:    ret void
1032 //
test65(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1033 void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1034   __vector_quad vq = *((__vector_quad *)vqp);
1035   __vector_pair vp = *((__vector_pair *)vpp);
1036   __builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0);
1037   *((__vector_quad *)resp) = vq;
1038 }
1039 
1040 // CHECK-LABEL: @test66(
1041 // CHECK-NEXT:  entry:
1042 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1043 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]])
1044 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1045 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
1046 // CHECK-NEXT:    ret void
1047 //
test66(const __vector_pair * vpp,const __vector_pair * vp2)1048 void test66(const __vector_pair *vpp, const __vector_pair *vp2) {
1049   __vector_pair vp = __builtin_vsx_lxvp(0LL, vpp);
1050   __builtin_vsx_stxvp(vp, 0LL, vp2);
1051 }
1052 
1053 // CHECK-LABEL: @test67(
1054 // CHECK-NEXT:  entry:
1055 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1056 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]]
1057 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1058 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1059 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]]
1060 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1061 // CHECK-NEXT:    ret void
1062 //
test67(const __vector_pair * vpp,signed long long offset,const __vector_pair * vp2)1063 void test67(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
1064   __vector_pair vp = __builtin_vsx_lxvp(offset, vpp);
1065   __builtin_vsx_stxvp(vp, offset, vp2);
1066 }
1067 
1068 // CHECK-LABEL: @test68(
1069 // CHECK-NEXT:  entry:
1070 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1071 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18
1072 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1073 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1074 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18
1075 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1076 // CHECK-NEXT:    ret void
1077 //
test68(const __vector_pair * vpp,const __vector_pair * vp2)1078 void test68(const __vector_pair *vpp, const __vector_pair *vp2) {
1079   __vector_pair vp = __builtin_vsx_lxvp(18LL, vpp);
1080   __builtin_vsx_stxvp(vp, 18LL, vp2);
1081 }
1082 
1083 // CHECK-LABEL: @test69(
1084 // CHECK-NEXT:  entry:
1085 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1086 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1
1087 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1088 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1089 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1
1090 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1091 // CHECK-NEXT:    ret void
1092 //
test69(const __vector_pair * vpp,const __vector_pair * vp2)1093 void test69(const __vector_pair *vpp, const __vector_pair *vp2) {
1094   __vector_pair vp = __builtin_vsx_lxvp(1LL, vpp);
1095   __builtin_vsx_stxvp(vp, 1LL, vp2);
1096 }
1097 
1098 // CHECK-LABEL: @test70(
1099 // CHECK-NEXT:  entry:
1100 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1101 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42
1102 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1103 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1104 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42
1105 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1106 // CHECK-NEXT:    ret void
1107 //
test70(const __vector_pair * vpp,const __vector_pair * vp2)1108 void test70(const __vector_pair *vpp, const __vector_pair *vp2) {
1109   __vector_pair vp = __builtin_vsx_lxvp(42LL, vpp);
1110   __builtin_vsx_stxvp(vp, 42LL, vp2);
1111 }
1112 
1113 // CHECK-LABEL: @test71(
1114 // CHECK-NEXT:  entry:
1115 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128
1116 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8*
1117 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1118 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128
1119 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8*
1120 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1121 // CHECK-NEXT:    ret void
1122 //
test71(const __vector_pair * vpp,const __vector_pair * vp2)1123 void test71(const __vector_pair *vpp, const __vector_pair *vp2) {
1124   __vector_pair vp = __builtin_vsx_lxvp(32768LL, vpp);
1125   __builtin_vsx_stxvp(vp, 32768LL, vp2);
1126 }
1127 
1128 // CHECK-LABEL: @test72(
1129 // CHECK-NEXT:  entry:
1130 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1131 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799
1132 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1133 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1134 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799
1135 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1136 // CHECK-NEXT:    ret void
1137 //
test72(const __vector_pair * vpp,const __vector_pair * vp2)1138 void test72(const __vector_pair *vpp, const __vector_pair *vp2) {
1139   __vector_pair vp = __builtin_vsx_lxvp(32799LL, vpp);
1140   __builtin_vsx_stxvp(vp, 32799LL, vp2);
1141 }
1142 
1143 // CHECK-LABEL: @test73(
1144 // CHECK-NEXT:  entry:
1145 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1146 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1147 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1148 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
1149 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
1150 // CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
1151 // CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1152 // CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
1153 // CHECK-NEXT:    ret void
1154 //
test73(unsigned char * vqp,const __vector_pair * vpp,vector unsigned char vc,unsigned char * resp)1155 void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1156   __vector_quad vq = *((__vector_quad *)vqp);
1157   __vector_pair vp = __builtin_vsx_lxvp(8LL, vpp);
1158   __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
1159   *((__vector_quad *)resp) = vq;
1160 }
1161 
1162 // CHECK-LABEL: @test74(
1163 // CHECK-NEXT:  entry:
1164 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1165 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1166 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1167 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]])
1168 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
1169 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1170 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
1171 // CHECK-NEXT:    ret void
1172 //
test74(unsigned char * vqp,const __vector_pair * vpp,vector unsigned char vc,unsigned char * resp)1173 void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1174   __vector_quad vq = *((__vector_quad *)vqp);
1175   __vector_pair vp = __builtin_vsx_lxvp(0LL, vpp);
1176   __builtin_mma_xvf64gernp(&vq, vp, vc);
1177   *((__vector_quad *)resp) = vq;
1178 }
1179 
1180 // CHECK-LABEL: @test75(
1181 // CHECK-NEXT:  entry:
1182 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1183 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1184 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1185 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
1186 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
1187 // CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
1188 // CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1189 // CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
1190 // CHECK-NEXT:    ret void
1191 //
test75(unsigned char * vqp,signed long long offs,const __vector_pair * vpp,vector unsigned char vc,unsigned char * resp)1192 void test75(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1193   __vector_quad vq = *((__vector_quad *)vqp);
1194   __vector_pair vp = __builtin_vsx_lxvp(offs, vpp);
1195   __builtin_mma_xvf64gernp(&vq, vp, vc);
1196   *((__vector_quad *)resp) = vq;
1197 }
1198 
1199 // CHECK-LABEL: @test76(
1200 // CHECK-NEXT:  entry:
1201 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
1202 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[RESP:%.*]] to <256 x i1>*
1203 // CHECK-NEXT:    store <256 x i1> [[TMP0]], <256 x i1>* [[TMP1]], align 32, !tbaa [[TBAA6]]
1204 // CHECK-NEXT:    ret void
1205 //
test76(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1206 void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1207   __vector_quad vq = *((__vector_quad *)vqp);
1208   __vector_pair vp = *((__vector_pair *)vpp);
1209   __vector_pair res;
1210   __builtin_mma_assemble_pair(&res, vc, vc);
1211   *((__vector_pair *)resp) = res;
1212 }
1213 
1214 // CHECK-LABEL: @test77(
1215 // CHECK-NEXT:  entry:
1216 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VPP:%.*]] to <256 x i1>*
1217 // CHECK-NEXT:    [[TMP1:%.*]] = load <256 x i1>, <256 x i1>* [[TMP0]], align 32
1218 // CHECK-NEXT:    [[TMP2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP1]])
1219 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[RESP:%.*]] to <16 x i8>*
1220 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 0
1221 // CHECK-NEXT:    store <16 x i8> [[TMP4]], <16 x i8>* [[TMP3]], align 16
1222 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP2]], 1
1223 // CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[RESP]], i64 16
1224 // CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <16 x i8>*
1225 // CHECK-NEXT:    store <16 x i8> [[TMP5]], <16 x i8>* [[TMP7]], align 16
1226 // CHECK-NEXT:    ret void
1227 //
test77(unsigned char * vqp,unsigned char * vpp,vector unsigned char vc,unsigned char * resp)1228 void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1229   __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
1230 }
1231 
1232 // CHECK-LABEL: @test78(
1233 // CHECK-NEXT:  entry:
1234 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1235 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP0]])
1236 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1237 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], i8* [[TMP2]])
1238 // CHECK-NEXT:    ret void
1239 //
test78(const __vector_pair * vpp,const __vector_pair * vp2)1240 void test78(const __vector_pair *vpp, const __vector_pair *vp2) {
1241   __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
1242   __builtin_mma_stxvp(vp, 0LL, vp2);
1243 }
1244 
1245 // CHECK-LABEL: @test79(
1246 // CHECK-NEXT:  entry:
1247 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1248 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[OFFSET:%.*]]
1249 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1250 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1251 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[OFFSET]]
1252 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1253 // CHECK-NEXT:    ret void
1254 //
test79(const __vector_pair * vpp,signed long long offset,const __vector_pair * vp2)1255 void test79(const __vector_pair *vpp, signed long long offset, const __vector_pair *vp2) {
1256   __vector_pair vp = __builtin_mma_lxvp(offset, vpp);
1257   __builtin_mma_stxvp(vp, offset, vp2);
1258 }
1259 
1260 // CHECK-LABEL: @test80(
1261 // CHECK-NEXT:  entry:
1262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1263 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 18
1264 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1265 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1266 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 18
1267 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1268 // CHECK-NEXT:    ret void
1269 //
test80(const __vector_pair * vpp,const __vector_pair * vp2)1270 void test80(const __vector_pair *vpp, const __vector_pair *vp2) {
1271   __vector_pair vp = __builtin_mma_lxvp(18LL, vpp);
1272   __builtin_mma_stxvp(vp, 18LL, vp2);
1273 }
1274 
1275 // CHECK-LABEL: @test81(
1276 // CHECK-NEXT:  entry:
1277 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1278 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 1
1279 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1280 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1281 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 1
1282 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1283 // CHECK-NEXT:    ret void
1284 //
test81(const __vector_pair * vpp,const __vector_pair * vp2)1285 void test81(const __vector_pair *vpp, const __vector_pair *vp2) {
1286   __vector_pair vp = __builtin_mma_lxvp(1LL, vpp);
1287   __builtin_mma_stxvp(vp, 1LL, vp2);
1288 }
1289 
1290 // CHECK-LABEL: @test82(
1291 // CHECK-NEXT:  entry:
1292 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1293 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 42
1294 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1295 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1296 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 42
1297 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1298 // CHECK-NEXT:    ret void
1299 //
test82(const __vector_pair * vpp,const __vector_pair * vp2)1300 void test82(const __vector_pair *vpp, const __vector_pair *vp2) {
1301   __vector_pair vp = __builtin_mma_lxvp(42LL, vpp);
1302   __builtin_mma_stxvp(vp, 42LL, vp2);
1303 }
1304 
1305 // CHECK-LABEL: @test83(
1306 // CHECK-NEXT:  entry:
1307 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VPP:%.*]], i64 128
1308 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <256 x i1>* [[TMP0]] to i8*
1309 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1310 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <256 x i1>, <256 x i1>* [[VP2:%.*]], i64 128
1311 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <256 x i1>* [[TMP3]] to i8*
1312 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1313 // CHECK-NEXT:    ret void
1314 //
test83(const __vector_pair * vpp,const __vector_pair * vp2)1315 void test83(const __vector_pair *vpp, const __vector_pair *vp2) {
1316   __vector_pair vp = __builtin_mma_lxvp(32768LL, vpp);
1317   __builtin_mma_stxvp(vp, 32768LL, vp2);
1318 }
1319 
1320 // CHECK-LABEL: @test84(
1321 // CHECK-NEXT:  entry:
1322 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1323 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 32799
1324 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP1]])
1325 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <256 x i1>* [[VP2:%.*]] to i8*
1326 // CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[TMP3]], i64 32799
1327 // CHECK-NEXT:    tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP2]], i8* [[TMP4]])
1328 // CHECK-NEXT:    ret void
1329 //
test84(const __vector_pair * vpp,const __vector_pair * vp2)1330 void test84(const __vector_pair *vpp, const __vector_pair *vp2) {
1331   __vector_pair vp = __builtin_mma_lxvp(32799LL, vpp);
1332   __builtin_mma_stxvp(vp, 32799LL, vp2);
1333 }
1334 
1335 // CHECK-LABEL: @test85(
1336 // CHECK-NEXT:  entry:
1337 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1338 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1339 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1340 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 8
1341 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
1342 // CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
1343 // CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1344 // CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
1345 // CHECK-NEXT:    ret void
1346 //
test85(unsigned char * vqp,const __vector_pair * vpp,vector unsigned char vc,unsigned char * resp)1347 void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1348   __vector_quad vq = *((__vector_quad *)vqp);
1349   __vector_pair vp = __builtin_mma_lxvp(8LL, vpp);
1350   __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
1351   *((__vector_quad *)resp) = vq;
1352 }
1353 
1354 // CHECK-LABEL: @test86(
1355 // CHECK-NEXT:  entry:
1356 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1357 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1358 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1359 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP2]])
1360 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP3]], <16 x i8> [[VC:%.*]])
1361 // CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1362 // CHECK-NEXT:    store <512 x i1> [[TMP4]], <512 x i1>* [[TMP5]], align 64, !tbaa [[TBAA2]]
1363 // CHECK-NEXT:    ret void
1364 //
test86(unsigned char * vqp,const __vector_pair * vpp,vector unsigned char vc,unsigned char * resp)1365 void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1366   __vector_quad vq = *((__vector_quad *)vqp);
1367   __vector_pair vp = __builtin_mma_lxvp(0LL, vpp);
1368   __builtin_mma_xvf64gernp(&vq, vp, vc);
1369   *((__vector_quad *)resp) = vq;
1370 }
1371 
1372 // CHECK-LABEL: @test87(
1373 // CHECK-NEXT:  entry:
1374 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[VQP:%.*]] to <512 x i1>*
1375 // CHECK-NEXT:    [[TMP1:%.*]] = load <512 x i1>, <512 x i1>* [[TMP0]], align 64, !tbaa [[TBAA2]]
1376 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <256 x i1>* [[VPP:%.*]] to i8*
1377 // CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[OFFS:%.*]]
1378 // CHECK-NEXT:    [[TMP4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* [[TMP3]])
1379 // CHECK-NEXT:    [[TMP5:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP1]], <256 x i1> [[TMP4]], <16 x i8> [[VC:%.*]])
1380 // CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[RESP:%.*]] to <512 x i1>*
1381 // CHECK-NEXT:    store <512 x i1> [[TMP5]], <512 x i1>* [[TMP6]], align 64, !tbaa [[TBAA2]]
1382 // CHECK-NEXT:    ret void
1383 //
test87(unsigned char * vqp,signed long long offs,const __vector_pair * vpp,vector unsigned char vc,unsigned char * resp)1384 void test87(unsigned char *vqp, signed long long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1385   __vector_quad vq = *((__vector_quad *)vqp);
1386   __vector_pair vp = __builtin_mma_lxvp(offs, vpp);
1387   __builtin_mma_xvf64gernp(&vq, vp, vc);
1388   *((__vector_quad *)resp) = vq;
1389 }
1390