1 /* { dg-do compile } */
2 /* { dg-require-effective-target power10_ok } */
3 /* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
4
5 typedef unsigned char vec_t __attribute__((vector_size(16)));
6
7 void
foo0(__vector_quad * dst,vec_t * vec)8 foo0 (__vector_quad *dst, vec_t *vec)
9 {
10 __vector_quad acc;
11 vec_t vec0 = vec[0];
12 vec_t vec1 = vec[1];
13
14 __builtin_mma_xvi4ger8 (&acc, vec0, vec1);
15 __builtin_mma_xvi4ger8pp (&acc, vec0, vec1);
16 dst[0] = acc;
17 }
18
19 void
foo1(__vector_quad * dst,vec_t * vec)20 foo1 (__vector_quad *dst, vec_t *vec)
21 {
22 __vector_quad acc;
23 vec_t vec0 = vec[0];
24 vec_t vec1 = vec[1];
25
26 __builtin_mma_xvi8ger4 (&acc, vec0, vec1);
27 __builtin_mma_xvi8ger4pp (&acc, vec0, vec1);
28 __builtin_mma_xvi8ger4spp(&acc, vec0, vec1);
29 dst[1] = acc;
30 }
31
32 void
foo2(__vector_quad * dst,vec_t * vec)33 foo2 (__vector_quad *dst, vec_t *vec)
34 {
35 __vector_quad acc;
36 vec_t vec0 = vec[0];
37 vec_t vec1 = vec[1];
38
39 __builtin_mma_xvi16ger2 (&acc, vec0, vec1);
40 __builtin_mma_xvi16ger2pp (&acc, vec0, vec1);
41 dst[2] = acc;
42 }
43
44 void
foo3(__vector_quad * dst,vec_t * vec)45 foo3 (__vector_quad *dst, vec_t *vec)
46 {
47 __vector_quad acc;
48 vec_t vec0 = vec[0];
49 vec_t vec1 = vec[1];
50
51 __builtin_mma_xvi16ger2s (&acc, vec0, vec1);
52 __builtin_mma_xvi16ger2spp (&acc, vec0, vec1);
53 dst[3] = acc;
54 }
55
56 void
foo4(__vector_quad * dst,vec_t * vec)57 foo4 (__vector_quad *dst, vec_t *vec)
58 {
59 __vector_quad acc;
60 vec_t vec0 = vec[0];
61 vec_t vec1 = vec[1];
62
63 __builtin_mma_xvf16ger2 (&acc, vec0, vec1);
64 __builtin_mma_xvf16ger2pp (&acc, vec0, vec1);
65 __builtin_mma_xvf16ger2pn (&acc, vec0, vec1);
66 dst[4] = acc;
67 }
68
69 void
foo4b(__vector_quad * dst,__vector_quad * src,vec_t * vec)70 foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
71 {
72 __vector_quad acc;
73 vec_t vec0 = vec[0];
74 vec_t vec1 = vec[1];
75
76 acc = src[0];
77 __builtin_mma_xvf16ger2np (&acc, vec0, vec1);
78 __builtin_mma_xvf16ger2nn (&acc, vec0, vec1);
79 dst[4] = acc;
80 }
81
82 void
foo5(__vector_quad * dst,vec_t * vec)83 foo5 (__vector_quad *dst, vec_t *vec)
84 {
85 __vector_quad acc;
86 vec_t vec0 = vec[0];
87 vec_t vec1 = vec[1];
88
89 __builtin_mma_xvbf16ger2 (&acc, vec0, vec1);
90 __builtin_mma_xvbf16ger2pp (&acc, vec0, vec1);
91 __builtin_mma_xvbf16ger2pn (&acc, vec0, vec1);
92 dst[5] = acc;
93 }
94
95 void
foo5b(__vector_quad * dst,__vector_quad * src,vec_t * vec)96 foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
97 {
98 __vector_quad acc;
99 vec_t vec0 = vec[0];
100 vec_t vec1 = vec[1];
101
102 acc = src[0];
103 __builtin_mma_xvbf16ger2np (&acc, vec0, vec1);
104 __builtin_mma_xvbf16ger2nn (&acc, vec0, vec1);
105 dst[5] = acc;
106 }
107
108 void
foo6(__vector_quad * dst,vec_t * vec)109 foo6 (__vector_quad *dst, vec_t *vec)
110 {
111 __vector_quad acc;
112 vec_t vec0 = vec[0];
113 vec_t vec1 = vec[1];
114
115 __builtin_mma_xvf32ger (&acc, vec0, vec1);
116 __builtin_mma_xvf32gerpp (&acc, vec0, vec1);
117 __builtin_mma_xvf32gerpn (&acc, vec0, vec1);
118 dst[6] = acc;
119 }
120
121 void
foo6b(__vector_quad * dst,__vector_quad * src,vec_t * vec)122 foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
123 {
124 __vector_quad acc;
125 vec_t vec0 = vec[0];
126 vec_t vec1 = vec[1];
127
128 acc = src[0];
129 __builtin_mma_xvf32gernp (&acc, vec0, vec1);
130 __builtin_mma_xvf32gernn (&acc, vec0, vec1);
131 dst[6] = acc;
132 }
133
134 void
foo7(__vector_quad * dst,vec_t * vec)135 foo7 (__vector_quad *dst, vec_t *vec)
136 {
137 __vector_quad acc;
138 vec_t vec0 = vec[0];
139 vec_t vec1 = vec[1];
140
141 __builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255);
142 __builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255);
143 dst[7] = acc;
144 }
145
146 void
foo8(__vector_quad * dst,vec_t * vec)147 foo8 (__vector_quad *dst, vec_t *vec)
148 {
149 __vector_quad acc;
150 vec_t vec0 = vec[0];
151 vec_t vec1 = vec[1];
152
153 __builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15);
154 __builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15);
155 __builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15);
156 dst[8] = acc;
157 }
158
159 void
foo9(__vector_quad * dst,vec_t * vec)160 foo9 (__vector_quad *dst, vec_t *vec)
161 {
162 __vector_quad acc;
163 vec_t vec0 = vec[0];
164 vec_t vec1 = vec[1];
165
166 __builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3);
167 __builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3);
168 dst[9] = acc;
169 }
170
171 void
foo10(__vector_quad * dst,vec_t * vec)172 foo10 (__vector_quad *dst, vec_t *vec)
173 {
174 __vector_quad acc;
175 vec_t vec0 = vec[0];
176 vec_t vec1 = vec[1];
177
178 __builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3);
179 __builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3);
180 dst[10] = acc;
181 }
182
183 void
foo11(__vector_quad * dst,vec_t * vec)184 foo11 (__vector_quad *dst, vec_t *vec)
185 {
186 __vector_quad acc;
187 vec_t vec0 = vec[0];
188 vec_t vec1 = vec[1];
189
190 __builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3);
191 __builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
192 __builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
193 dst[11] = acc;
194 }
195
196 void
foo11b(__vector_quad * dst,__vector_quad * src,vec_t * vec)197 foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
198 {
199 __vector_quad acc;
200 vec_t vec0 = vec[0];
201 vec_t vec1 = vec[1];
202
203 acc = src[0];
204 __builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3);
205 __builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
206 dst[11] = acc;
207 }
208
209 void
foo12(__vector_quad * dst,vec_t * vec)210 foo12 (__vector_quad *dst, vec_t *vec)
211 {
212 __vector_quad acc;
213 vec_t vec0 = vec[0];
214 vec_t vec1 = vec[1];
215
216 __builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3);
217 __builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
218 __builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
219 dst[12] = acc;
220 }
221
222 void
foo12b(__vector_quad * dst,__vector_quad * src,vec_t * vec)223 foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
224 {
225 __vector_quad acc;
226 vec_t vec0 = vec[0];
227 vec_t vec1 = vec[1];
228
229 acc = src[0];
230 __builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3);
231 __builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
232 dst[12] = acc;
233 }
234
235 void
foo13(__vector_quad * dst,vec_t * vec)236 foo13 (__vector_quad *dst, vec_t *vec)
237 {
238 __vector_quad acc;
239 vec_t vec0 = vec[0];
240 vec_t vec1 = vec[1];
241
242 __builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15);
243 __builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15);
244 __builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15);
245 dst[13] = acc;
246 }
247
248 void
foo13b(__vector_quad * dst,__vector_quad * src,vec_t * vec)249 foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
250 {
251 __vector_quad acc;
252 vec_t vec0 = vec[0];
253 vec_t vec1 = vec[1];
254
255 acc = src[0];
256 __builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15);
257 __builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15);
258 dst[13] = acc;
259 }
260
261 /* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
262 /* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
263 /* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
264 /* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
265 /* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
266 /* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
267 /* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */
268 /* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */
269 /* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */
270 /* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */
271 /* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */
272 /* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */
273 /* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */
274 /* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */
275 /* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */
276 /* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */
277 /* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */
278 /* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */
279 /* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */
280 /* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */
281 /* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */
282 /* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */
283 /* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */
284 /* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */
285 /* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */
286 /* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */
287 /* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */
288 /* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */
289 /* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */
290 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */
291 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */
292 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */
293 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */
294 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */
295 /* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */
296 /* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */
297 /* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */
298 /* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */
299 /* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */
300 /* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */
301 /* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */
302 /* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */
303 /* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */
304 /* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */
305 /* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */
306 /* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */
307 /* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */
308 /* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */
309 /* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */
310 /* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */
311 /* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */
312 /* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */
313 /* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */
314