1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8
9; Function Attrs: nofree nounwind writeonly
10define dso_local void @test50(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
11; CHECK-LABEL: test50:
12; CHECK:       # %bb.0: # %entry
13; CHECK-NEXT:    xvbf16ger2 acc0, v2, v2
14; CHECK-NEXT:    xxmfacc acc0
15; CHECK-NEXT:    stxv vs0, 48(r7)
16; CHECK-NEXT:    stxv vs1, 32(r7)
17; CHECK-NEXT:    stxv vs2, 16(r7)
18; CHECK-NEXT:    stxv vs3, 0(r7)
19; CHECK-NEXT:    blr
20;
21; CHECK-BE-LABEL: test50:
22; CHECK-BE:       # %bb.0: # %entry
23; CHECK-BE-NEXT:    xvbf16ger2 acc0, v2, v2
24; CHECK-BE-NEXT:    xxmfacc acc0
25; CHECK-BE-NEXT:    stxv vs1, 16(r7)
26; CHECK-BE-NEXT:    stxv vs0, 0(r7)
27; CHECK-BE-NEXT:    stxv vs3, 48(r7)
28; CHECK-BE-NEXT:    stxv vs2, 32(r7)
29; CHECK-BE-NEXT:    blr
30entry:
31  %0 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> %vc, <16 x i8> %vc)
32  %1 = bitcast i8* %resp to <512 x i1>*
33  store <512 x i1> %0, <512 x i1>* %1, align 64
34  ret void
35}
36
37; Function Attrs: nounwind readnone
38declare <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8>, <16 x i8>)
39
40; Function Attrs: nofree nounwind writeonly
41define dso_local void @test51(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
42; CHECK-LABEL: test51:
43; CHECK:       # %bb.0: # %entry
44; CHECK-NEXT:    pmxvbf16ger2 acc0, v2, v2, 0, 0, 0
45; CHECK-NEXT:    xxmfacc acc0
46; CHECK-NEXT:    stxv vs0, 48(r7)
47; CHECK-NEXT:    stxv vs1, 32(r7)
48; CHECK-NEXT:    stxv vs2, 16(r7)
49; CHECK-NEXT:    stxv vs3, 0(r7)
50; CHECK-NEXT:    blr
51;
52; CHECK-BE-LABEL: test51:
53; CHECK-BE:       # %bb.0: # %entry
54; CHECK-BE-NEXT:    pmxvbf16ger2 acc0, v2, v2, 0, 0, 0
55; CHECK-BE-NEXT:    xxmfacc acc0
56; CHECK-BE-NEXT:    stxv vs1, 16(r7)
57; CHECK-BE-NEXT:    stxv vs0, 0(r7)
58; CHECK-BE-NEXT:    stxv vs3, 48(r7)
59; CHECK-BE-NEXT:    stxv vs2, 32(r7)
60; CHECK-BE-NEXT:    blr
61entry:
62  %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
63  %1 = bitcast i8* %resp to <512 x i1>*
64  store <512 x i1> %0, <512 x i1>* %1, align 64
65  ret void
66}
67
68; Function Attrs: nounwind readnone
69declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32)
70
71; Function Attrs: nofree nounwind
72define dso_local void @test52(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
73; CHECK-LABEL: test52:
74; CHECK:       # %bb.0: # %entry
75; CHECK-NEXT:    lxv vs1, 32(r3)
76; CHECK-NEXT:    lxv vs0, 48(r3)
77; CHECK-NEXT:    lxv vs3, 0(r3)
78; CHECK-NEXT:    lxv vs2, 16(r3)
79; CHECK-NEXT:    xxmtacc acc0
80; CHECK-NEXT:    xvbf16ger2pp acc0, v2, v2
81; CHECK-NEXT:    xxmfacc acc0
82; CHECK-NEXT:    stxv vs0, 48(r7)
83; CHECK-NEXT:    stxv vs1, 32(r7)
84; CHECK-NEXT:    stxv vs2, 16(r7)
85; CHECK-NEXT:    stxv vs3, 0(r7)
86; CHECK-NEXT:    blr
87;
88; CHECK-BE-LABEL: test52:
89; CHECK-BE:       # %bb.0: # %entry
90; CHECK-BE-NEXT:    lxv vs1, 16(r3)
91; CHECK-BE-NEXT:    lxv vs0, 0(r3)
92; CHECK-BE-NEXT:    lxv vs3, 48(r3)
93; CHECK-BE-NEXT:    lxv vs2, 32(r3)
94; CHECK-BE-NEXT:    xxmtacc acc0
95; CHECK-BE-NEXT:    xvbf16ger2pp acc0, v2, v2
96; CHECK-BE-NEXT:    xxmfacc acc0
97; CHECK-BE-NEXT:    stxv vs1, 16(r7)
98; CHECK-BE-NEXT:    stxv vs0, 0(r7)
99; CHECK-BE-NEXT:    stxv vs3, 48(r7)
100; CHECK-BE-NEXT:    stxv vs2, 32(r7)
101; CHECK-BE-NEXT:    blr
102entry:
103  %0 = bitcast i8* %vqp to <512 x i1>*
104  %1 = load <512 x i1>, <512 x i1>* %0, align 64
105  %2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
106  %3 = bitcast i8* %resp to <512 x i1>*
107  store <512 x i1> %2, <512 x i1>* %3, align 64
108  ret void
109}
110
111; Function Attrs: nounwind readnone
112declare <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
113
114; Function Attrs: nofree nounwind
115define dso_local void @test53(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
116; CHECK-LABEL: test53:
117; CHECK:       # %bb.0: # %entry
118; CHECK-NEXT:    lxv vs1, 32(r3)
119; CHECK-NEXT:    lxv vs0, 48(r3)
120; CHECK-NEXT:    lxv vs3, 0(r3)
121; CHECK-NEXT:    lxv vs2, 16(r3)
122; CHECK-NEXT:    xxmtacc acc0
123; CHECK-NEXT:    xvbf16ger2pn acc0, v2, v2
124; CHECK-NEXT:    xxmfacc acc0
125; CHECK-NEXT:    stxv vs0, 48(r7)
126; CHECK-NEXT:    stxv vs1, 32(r7)
127; CHECK-NEXT:    stxv vs2, 16(r7)
128; CHECK-NEXT:    stxv vs3, 0(r7)
129; CHECK-NEXT:    blr
130;
131; CHECK-BE-LABEL: test53:
132; CHECK-BE:       # %bb.0: # %entry
133; CHECK-BE-NEXT:    lxv vs1, 16(r3)
134; CHECK-BE-NEXT:    lxv vs0, 0(r3)
135; CHECK-BE-NEXT:    lxv vs3, 48(r3)
136; CHECK-BE-NEXT:    lxv vs2, 32(r3)
137; CHECK-BE-NEXT:    xxmtacc acc0
138; CHECK-BE-NEXT:    xvbf16ger2pn acc0, v2, v2
139; CHECK-BE-NEXT:    xxmfacc acc0
140; CHECK-BE-NEXT:    stxv vs1, 16(r7)
141; CHECK-BE-NEXT:    stxv vs0, 0(r7)
142; CHECK-BE-NEXT:    stxv vs3, 48(r7)
143; CHECK-BE-NEXT:    stxv vs2, 32(r7)
144; CHECK-BE-NEXT:    blr
145entry:
146  %0 = bitcast i8* %vqp to <512 x i1>*
147  %1 = load <512 x i1>, <512 x i1>* %0, align 64
148  %2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
149  %3 = bitcast i8* %resp to <512 x i1>*
150  store <512 x i1> %2, <512 x i1>* %3, align 64
151  ret void
152}
153
154; Function Attrs: nounwind readnone
155declare <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>)
156
157; Function Attrs: nofree nounwind
158define dso_local void @test54(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
159; CHECK-LABEL: test54:
160; CHECK:       # %bb.0: # %entry
161; CHECK-NEXT:    lxv vs1, 32(r3)
162; CHECK-NEXT:    lxv vs0, 48(r3)
163; CHECK-NEXT:    lxv vs3, 0(r3)
164; CHECK-NEXT:    lxv vs2, 16(r3)
165; CHECK-NEXT:    xxmtacc acc0
166; CHECK-NEXT:    xvbf16ger2np acc0, v2, v2
167; CHECK-NEXT:    xxmfacc acc0
168; CHECK-NEXT:    stxv vs0, 48(r7)
169; CHECK-NEXT:    stxv vs1, 32(r7)
170; CHECK-NEXT:    stxv vs2, 16(r7)
171; CHECK-NEXT:    stxv vs3, 0(r7)
172; CHECK-NEXT:    blr
173;
174; CHECK-BE-LABEL: test54:
175; CHECK-BE:       # %bb.0: # %entry
176; CHECK-BE-NEXT:    lxv vs1, 16(r3)
177; CHECK-BE-NEXT:    lxv vs0, 0(r3)
178; CHECK-BE-NEXT:    lxv vs3, 48(r3)
179; CHECK-BE-NEXT:    lxv vs2, 32(r3)
180; CHECK-BE-NEXT:    xxmtacc acc0
181; CHECK-BE-NEXT:    xvbf16ger2np acc0, v2, v2
182; CHECK-BE-NEXT:    xxmfacc acc0
183; CHECK-BE-NEXT:    stxv vs1, 16(r7)
184; CHECK-BE-NEXT:    stxv vs0, 0(r7)
185; CHECK-BE-NEXT:    stxv vs3, 48(r7)
186; CHECK-BE-NEXT:    stxv vs2, 32(r7)
187; CHECK-BE-NEXT:    blr
188entry:
189  %0 = bitcast i8* %vqp to <512 x i1>*
190  %1 = load <512 x i1>, <512 x i1>* %0, align 64
191  %2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
192  %3 = bitcast i8* %resp to <512 x i1>*
193  store <512 x i1> %2, <512 x i1>* %3, align 64
194  ret void
195}
196
197; Function Attrs: nounwind readnone
198declare <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>)
199
200; Function Attrs: nofree nounwind
201define dso_local void @test55(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
202; CHECK-LABEL: test55:
203; CHECK:       # %bb.0: # %entry
204; CHECK-NEXT:    lxv vs1, 32(r3)
205; CHECK-NEXT:    lxv vs0, 48(r3)
206; CHECK-NEXT:    lxv vs3, 0(r3)
207; CHECK-NEXT:    lxv vs2, 16(r3)
208; CHECK-NEXT:    xxmtacc acc0
209; CHECK-NEXT:    xvbf16ger2nn acc0, v2, v2
210; CHECK-NEXT:    xxmfacc acc0
211; CHECK-NEXT:    stxv vs0, 48(r7)
212; CHECK-NEXT:    stxv vs1, 32(r7)
213; CHECK-NEXT:    stxv vs2, 16(r7)
214; CHECK-NEXT:    stxv vs3, 0(r7)
215; CHECK-NEXT:    blr
216;
217; CHECK-BE-LABEL: test55:
218; CHECK-BE:       # %bb.0: # %entry
219; CHECK-BE-NEXT:    lxv vs1, 16(r3)
220; CHECK-BE-NEXT:    lxv vs0, 0(r3)
221; CHECK-BE-NEXT:    lxv vs3, 48(r3)
222; CHECK-BE-NEXT:    lxv vs2, 32(r3)
223; CHECK-BE-NEXT:    xxmtacc acc0
224; CHECK-BE-NEXT:    xvbf16ger2nn acc0, v2, v2
225; CHECK-BE-NEXT:    xxmfacc acc0
226; CHECK-BE-NEXT:    stxv vs1, 16(r7)
227; CHECK-BE-NEXT:    stxv vs0, 0(r7)
228; CHECK-BE-NEXT:    stxv vs3, 48(r7)
229; CHECK-BE-NEXT:    stxv vs2, 32(r7)
230; CHECK-BE-NEXT:    blr
231entry:
232  %0 = bitcast i8* %vqp to <512 x i1>*
233  %1 = load <512 x i1>, <512 x i1>* %0, align 64
234  %2 = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
235  %3 = bitcast i8* %resp to <512 x i1>*
236  store <512 x i1> %2, <512 x i1>* %3, align 64
237  ret void
238}
239
240; Function Attrs: nounwind readnone
241declare <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>)
242
243; Function Attrs: nofree nounwind
244define dso_local void @test56(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
245; CHECK-LABEL: test56:
246; CHECK:       # %bb.0: # %entry
247; CHECK-NEXT:    lxv vs1, 32(r3)
248; CHECK-NEXT:    lxv vs0, 48(r3)
249; CHECK-NEXT:    lxv vs3, 0(r3)
250; CHECK-NEXT:    lxv vs2, 16(r3)
251; CHECK-NEXT:    xxmtacc acc0
252; CHECK-NEXT:    pmxvbf16ger2pp acc0, v2, v2, 0, 0, 0
253; CHECK-NEXT:    xxmfacc acc0
254; CHECK-NEXT:    stxv vs0, 48(r7)
255; CHECK-NEXT:    stxv vs1, 32(r7)
256; CHECK-NEXT:    stxv vs2, 16(r7)
257; CHECK-NEXT:    stxv vs3, 0(r7)
258; CHECK-NEXT:    blr
259;
260; CHECK-BE-LABEL: test56:
261; CHECK-BE:       # %bb.0: # %entry
262; CHECK-BE-NEXT:    lxv vs1, 16(r3)
263; CHECK-BE-NEXT:    lxv vs0, 0(r3)
264; CHECK-BE-NEXT:    lxv vs3, 48(r3)
265; CHECK-BE-NEXT:    lxv vs2, 32(r3)
266; CHECK-BE-NEXT:    xxmtacc acc0
267; CHECK-BE-NEXT:    pmxvbf16ger2pp acc0, v2, v2, 0, 0, 0
268; CHECK-BE-NEXT:    xxmfacc acc0
269; CHECK-BE-NEXT:    stxv vs1, 16(r7)
270; CHECK-BE-NEXT:    stxv vs0, 0(r7)
271; CHECK-BE-NEXT:    stxv vs3, 48(r7)
272; CHECK-BE-NEXT:    stxv vs2, 32(r7)
273; CHECK-BE-NEXT:    blr
274entry:
275  %0 = bitcast i8* %vqp to <512 x i1>*
276  %1 = load <512 x i1>, <512 x i1>* %0, align 64
277  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
278  %3 = bitcast i8* %resp to <512 x i1>*
279  store <512 x i1> %2, <512 x i1>* %3, align 64
280  ret void
281}
282
283; Function Attrs: nounwind readnone
284declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
285
286; Function Attrs: nofree nounwind
287define dso_local void @test57(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
288; CHECK-LABEL: test57:
289; CHECK:       # %bb.0: # %entry
290; CHECK-NEXT:    lxv vs1, 32(r3)
291; CHECK-NEXT:    lxv vs0, 48(r3)
292; CHECK-NEXT:    lxv vs3, 0(r3)
293; CHECK-NEXT:    lxv vs2, 16(r3)
294; CHECK-NEXT:    xxmtacc acc0
295; CHECK-NEXT:    pmxvbf16ger2pn acc0, v2, v2, 0, 0, 0
296; CHECK-NEXT:    xxmfacc acc0
297; CHECK-NEXT:    stxv vs0, 48(r7)
298; CHECK-NEXT:    stxv vs1, 32(r7)
299; CHECK-NEXT:    stxv vs2, 16(r7)
300; CHECK-NEXT:    stxv vs3, 0(r7)
301; CHECK-NEXT:    blr
302;
303; CHECK-BE-LABEL: test57:
304; CHECK-BE:       # %bb.0: # %entry
305; CHECK-BE-NEXT:    lxv vs1, 16(r3)
306; CHECK-BE-NEXT:    lxv vs0, 0(r3)
307; CHECK-BE-NEXT:    lxv vs3, 48(r3)
308; CHECK-BE-NEXT:    lxv vs2, 32(r3)
309; CHECK-BE-NEXT:    xxmtacc acc0
310; CHECK-BE-NEXT:    pmxvbf16ger2pn acc0, v2, v2, 0, 0, 0
311; CHECK-BE-NEXT:    xxmfacc acc0
312; CHECK-BE-NEXT:    stxv vs1, 16(r7)
313; CHECK-BE-NEXT:    stxv vs0, 0(r7)
314; CHECK-BE-NEXT:    stxv vs3, 48(r7)
315; CHECK-BE-NEXT:    stxv vs2, 32(r7)
316; CHECK-BE-NEXT:    blr
317entry:
318  %0 = bitcast i8* %vqp to <512 x i1>*
319  %1 = load <512 x i1>, <512 x i1>* %0, align 64
320  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
321  %3 = bitcast i8* %resp to <512 x i1>*
322  store <512 x i1> %2, <512 x i1>* %3, align 64
323  ret void
324}
325
326; Function Attrs: nounwind readnone
327declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
328
329; Function Attrs: nofree nounwind
330define dso_local void @test58(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
331; CHECK-LABEL: test58:
332; CHECK:       # %bb.0: # %entry
333; CHECK-NEXT:    lxv vs1, 32(r3)
334; CHECK-NEXT:    lxv vs0, 48(r3)
335; CHECK-NEXT:    lxv vs3, 0(r3)
336; CHECK-NEXT:    lxv vs2, 16(r3)
337; CHECK-NEXT:    xxmtacc acc0
338; CHECK-NEXT:    pmxvbf16ger2np acc0, v2, v2, 0, 0, 0
339; CHECK-NEXT:    xxmfacc acc0
340; CHECK-NEXT:    stxv vs0, 48(r7)
341; CHECK-NEXT:    stxv vs1, 32(r7)
342; CHECK-NEXT:    stxv vs2, 16(r7)
343; CHECK-NEXT:    stxv vs3, 0(r7)
344; CHECK-NEXT:    blr
345;
346; CHECK-BE-LABEL: test58:
347; CHECK-BE:       # %bb.0: # %entry
348; CHECK-BE-NEXT:    lxv vs1, 16(r3)
349; CHECK-BE-NEXT:    lxv vs0, 0(r3)
350; CHECK-BE-NEXT:    lxv vs3, 48(r3)
351; CHECK-BE-NEXT:    lxv vs2, 32(r3)
352; CHECK-BE-NEXT:    xxmtacc acc0
353; CHECK-BE-NEXT:    pmxvbf16ger2np acc0, v2, v2, 0, 0, 0
354; CHECK-BE-NEXT:    xxmfacc acc0
355; CHECK-BE-NEXT:    stxv vs1, 16(r7)
356; CHECK-BE-NEXT:    stxv vs0, 0(r7)
357; CHECK-BE-NEXT:    stxv vs3, 48(r7)
358; CHECK-BE-NEXT:    stxv vs2, 32(r7)
359; CHECK-BE-NEXT:    blr
360entry:
361  %0 = bitcast i8* %vqp to <512 x i1>*
362  %1 = load <512 x i1>, <512 x i1>* %0, align 64
363  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
364  %3 = bitcast i8* %resp to <512 x i1>*
365  store <512 x i1> %2, <512 x i1>* %3, align 64
366  ret void
367}
368
369; Function Attrs: nounwind readnone
370declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
371
372; Function Attrs: nofree nounwind
373define dso_local void @test59(i8* nocapture readonly %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
374; CHECK-LABEL: test59:
375; CHECK:       # %bb.0: # %entry
376; CHECK-NEXT:    lxv vs1, 32(r3)
377; CHECK-NEXT:    lxv vs0, 48(r3)
378; CHECK-NEXT:    lxv vs3, 0(r3)
379; CHECK-NEXT:    lxv vs2, 16(r3)
380; CHECK-NEXT:    xxmtacc acc0
381; CHECK-NEXT:    pmxvbf16ger2nn acc0, v2, v2, 0, 0, 0
382; CHECK-NEXT:    xxmfacc acc0
383; CHECK-NEXT:    stxv vs0, 48(r7)
384; CHECK-NEXT:    stxv vs1, 32(r7)
385; CHECK-NEXT:    stxv vs2, 16(r7)
386; CHECK-NEXT:    stxv vs3, 0(r7)
387; CHECK-NEXT:    blr
388;
389; CHECK-BE-LABEL: test59:
390; CHECK-BE:       # %bb.0: # %entry
391; CHECK-BE-NEXT:    lxv vs1, 16(r3)
392; CHECK-BE-NEXT:    lxv vs0, 0(r3)
393; CHECK-BE-NEXT:    lxv vs3, 48(r3)
394; CHECK-BE-NEXT:    lxv vs2, 32(r3)
395; CHECK-BE-NEXT:    xxmtacc acc0
396; CHECK-BE-NEXT:    pmxvbf16ger2nn acc0, v2, v2, 0, 0, 0
397; CHECK-BE-NEXT:    xxmfacc acc0
398; CHECK-BE-NEXT:    stxv vs1, 16(r7)
399; CHECK-BE-NEXT:    stxv vs0, 0(r7)
400; CHECK-BE-NEXT:    stxv vs3, 48(r7)
401; CHECK-BE-NEXT:    stxv vs2, 32(r7)
402; CHECK-BE-NEXT:    blr
403entry:
404  %0 = bitcast i8* %vqp to <512 x i1>*
405  %1 = load <512 x i1>, <512 x i1>* %0, align 64
406  %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
407  %3 = bitcast i8* %resp to <512 x i1>*
408  store <512 x i1> %2, <512 x i1>* %3, align 64
409  ret void
410}
411
412; Function Attrs: nounwind readnone
413declare <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
414
415; Function Attrs: nofree nounwind writeonly
416define dso_local void @test60(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
417; CHECK-LABEL: test60:
418; CHECK:       # %bb.0: # %entry
419; CHECK-NEXT:    xvcvspbf16 vs0, v2
420; CHECK-NEXT:    stxv vs0, 0(r7)
421; CHECK-NEXT:    blr
422;
423; CHECK-BE-LABEL: test60:
424; CHECK-BE:       # %bb.0: # %entry
425; CHECK-BE-NEXT:    xvcvspbf16 vs0, v2
426; CHECK-BE-NEXT:    stxv vs0, 0(r7)
427; CHECK-BE-NEXT:    blr
428entry:
429  %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> %vc)
430  %1 = bitcast i8* %resp to <16 x i8>*
431  store <16 x i8> %0, <16 x i8>* %1, align 16
432  ret void
433}
434
435; Function Attrs: nounwind readnone
436declare <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8>)
437
438; Function Attrs: nofree nounwind writeonly
439define dso_local void @test61(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) {
440; CHECK-LABEL: test61:
441; CHECK:       # %bb.0: # %entry
442; CHECK-NEXT:    xvcvbf16spn vs0, v2
443; CHECK-NEXT:    stxv vs0, 0(r7)
444; CHECK-NEXT:    blr
445;
446; CHECK-BE-LABEL: test61:
447; CHECK-BE:       # %bb.0: # %entry
448; CHECK-BE-NEXT:    xvcvbf16spn vs0, v2
449; CHECK-BE-NEXT:    stxv vs0, 0(r7)
450; CHECK-BE-NEXT:    blr
451entry:
452  %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %vc)
453  %1 = bitcast i8* %resp to <16 x i8>*
454  store <16 x i8> %0, <16 x i8>* %1, align 16
455  ret void
456}
457
458; Function Attrs: nounwind readnone
459declare <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8>)
460