1 /* { dg-do compile } */
2 /* { dg-require-effective-target power10_ok } */
3 /* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
4 
5 typedef unsigned char  vec_t __attribute__((vector_size(16)));
6 
7 void
foo0(__vector_quad * dst,vec_t * vec)8 foo0 (__vector_quad *dst, vec_t *vec)
9 {
10   __vector_quad acc;
11   vec_t vec0 = vec[0];
12   vec_t vec1 = vec[1];
13 
14   __builtin_mma_xvi4ger8 (&acc, vec0, vec1);
15   __builtin_mma_xvi4ger8pp (&acc, vec0, vec1);
16   dst[0] = acc;
17 }
18 
19 void
foo1(__vector_quad * dst,vec_t * vec)20 foo1 (__vector_quad *dst, vec_t *vec)
21 {
22   __vector_quad acc;
23   vec_t vec0 = vec[0];
24   vec_t vec1 = vec[1];
25 
26   __builtin_mma_xvi8ger4 (&acc, vec0, vec1);
27   __builtin_mma_xvi8ger4pp (&acc, vec0, vec1);
28   __builtin_mma_xvi8ger4spp(&acc, vec0, vec1);
29   dst[1] = acc;
30 }
31 
32 void
foo2(__vector_quad * dst,vec_t * vec)33 foo2 (__vector_quad *dst, vec_t *vec)
34 {
35   __vector_quad acc;
36   vec_t vec0 = vec[0];
37   vec_t vec1 = vec[1];
38 
39   __builtin_mma_xvi16ger2 (&acc, vec0, vec1);
40   __builtin_mma_xvi16ger2pp (&acc, vec0, vec1);
41   dst[2] = acc;
42 }
43 
44 void
foo3(__vector_quad * dst,vec_t * vec)45 foo3 (__vector_quad *dst, vec_t *vec)
46 {
47   __vector_quad acc;
48   vec_t vec0 = vec[0];
49   vec_t vec1 = vec[1];
50 
51   __builtin_mma_xvi16ger2s (&acc, vec0, vec1);
52   __builtin_mma_xvi16ger2spp (&acc, vec0, vec1);
53   dst[3] = acc;
54 }
55 
56 void
foo4(__vector_quad * dst,vec_t * vec)57 foo4 (__vector_quad *dst, vec_t *vec)
58 {
59   __vector_quad acc;
60   vec_t vec0 = vec[0];
61   vec_t vec1 = vec[1];
62 
63   __builtin_mma_xvf16ger2 (&acc, vec0, vec1);
64   __builtin_mma_xvf16ger2pp (&acc, vec0, vec1);
65   __builtin_mma_xvf16ger2pn (&acc, vec0, vec1);
66   dst[4] = acc;
67 }
68 
69 void
foo4b(__vector_quad * dst,__vector_quad * src,vec_t * vec)70 foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
71 {
72   __vector_quad acc;
73   vec_t vec0 = vec[0];
74   vec_t vec1 = vec[1];
75 
76   acc = src[0];
77   __builtin_mma_xvf16ger2np (&acc, vec0, vec1);
78   __builtin_mma_xvf16ger2nn (&acc, vec0, vec1);
79   dst[4] = acc;
80 }
81 
82 void
foo5(__vector_quad * dst,vec_t * vec)83 foo5 (__vector_quad *dst, vec_t *vec)
84 {
85   __vector_quad acc;
86   vec_t vec0 = vec[0];
87   vec_t vec1 = vec[1];
88 
89   __builtin_mma_xvbf16ger2 (&acc, vec0, vec1);
90   __builtin_mma_xvbf16ger2pp (&acc, vec0, vec1);
91   __builtin_mma_xvbf16ger2pn (&acc, vec0, vec1);
92   dst[5] = acc;
93 }
94 
95 void
foo5b(__vector_quad * dst,__vector_quad * src,vec_t * vec)96 foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
97 {
98   __vector_quad acc;
99   vec_t vec0 = vec[0];
100   vec_t vec1 = vec[1];
101 
102   acc = src[0];
103   __builtin_mma_xvbf16ger2np (&acc, vec0, vec1);
104   __builtin_mma_xvbf16ger2nn (&acc, vec0, vec1);
105   dst[5] = acc;
106 }
107 
108 void
foo6(__vector_quad * dst,vec_t * vec)109 foo6 (__vector_quad *dst, vec_t *vec)
110 {
111   __vector_quad acc;
112   vec_t vec0 = vec[0];
113   vec_t vec1 = vec[1];
114 
115   __builtin_mma_xvf32ger (&acc, vec0, vec1);
116   __builtin_mma_xvf32gerpp (&acc, vec0, vec1);
117   __builtin_mma_xvf32gerpn (&acc, vec0, vec1);
118   dst[6] = acc;
119 }
120 
121 void
foo6b(__vector_quad * dst,__vector_quad * src,vec_t * vec)122 foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
123 {
124   __vector_quad acc;
125   vec_t vec0 = vec[0];
126   vec_t vec1 = vec[1];
127 
128   acc = src[0];
129   __builtin_mma_xvf32gernp (&acc, vec0, vec1);
130   __builtin_mma_xvf32gernn (&acc, vec0, vec1);
131   dst[6] = acc;
132 }
133 
134 void
foo7(__vector_quad * dst,vec_t * vec)135 foo7 (__vector_quad *dst, vec_t *vec)
136 {
137   __vector_quad acc;
138   vec_t vec0 = vec[0];
139   vec_t vec1 = vec[1];
140 
141   __builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255);
142   __builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255);
143   dst[7] = acc;
144 }
145 
146 void
foo8(__vector_quad * dst,vec_t * vec)147 foo8 (__vector_quad *dst, vec_t *vec)
148 {
149   __vector_quad acc;
150   vec_t vec0 = vec[0];
151   vec_t vec1 = vec[1];
152 
153   __builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15);
154   __builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15);
155   __builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15);
156   dst[8] = acc;
157 }
158 
159 void
foo9(__vector_quad * dst,vec_t * vec)160 foo9 (__vector_quad *dst, vec_t *vec)
161 {
162   __vector_quad acc;
163   vec_t vec0 = vec[0];
164   vec_t vec1 = vec[1];
165 
166   __builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3);
167   __builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3);
168   dst[9] = acc;
169 }
170 
171 void
foo10(__vector_quad * dst,vec_t * vec)172 foo10 (__vector_quad *dst, vec_t *vec)
173 {
174   __vector_quad acc;
175   vec_t vec0 = vec[0];
176   vec_t vec1 = vec[1];
177 
178   __builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3);
179   __builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3);
180   dst[10] = acc;
181 }
182 
183 void
foo11(__vector_quad * dst,vec_t * vec)184 foo11 (__vector_quad *dst, vec_t *vec)
185 {
186   __vector_quad acc;
187   vec_t vec0 = vec[0];
188   vec_t vec1 = vec[1];
189 
190   __builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3);
191   __builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
192   __builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
193   dst[11] = acc;
194 }
195 
196 void
foo11b(__vector_quad * dst,__vector_quad * src,vec_t * vec)197 foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
198 {
199   __vector_quad acc;
200   vec_t vec0 = vec[0];
201   vec_t vec1 = vec[1];
202 
203   acc = src[0];
204   __builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3);
205   __builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
206   dst[11] = acc;
207 }
208 
209 void
foo12(__vector_quad * dst,vec_t * vec)210 foo12 (__vector_quad *dst, vec_t *vec)
211 {
212   __vector_quad acc;
213   vec_t vec0 = vec[0];
214   vec_t vec1 = vec[1];
215 
216   __builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3);
217   __builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
218   __builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
219   dst[12] = acc;
220 }
221 
222 void
foo12b(__vector_quad * dst,__vector_quad * src,vec_t * vec)223 foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
224 {
225   __vector_quad acc;
226   vec_t vec0 = vec[0];
227   vec_t vec1 = vec[1];
228 
229   acc = src[0];
230   __builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3);
231   __builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
232   dst[12] = acc;
233 }
234 
235 void
foo13(__vector_quad * dst,vec_t * vec)236 foo13 (__vector_quad *dst, vec_t *vec)
237 {
238   __vector_quad acc;
239   vec_t vec0 = vec[0];
240   vec_t vec1 = vec[1];
241 
242   __builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15);
243   __builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15);
244   __builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15);
245   dst[13] = acc;
246 }
247 
248 void
foo13b(__vector_quad * dst,__vector_quad * src,vec_t * vec)249 foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
250 {
251   __vector_quad acc;
252   vec_t vec0 = vec[0];
253   vec_t vec1 = vec[1];
254 
255   acc = src[0];
256   __builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15);
257   __builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15);
258   dst[13] = acc;
259 }
260 
261 /* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
262 /* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
263 /* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
264 /* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
265 /* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
266 /* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
267 /* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */
268 /* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */
269 /* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */
270 /* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */
271 /* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */
272 /* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */
273 /* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */
274 /* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */
275 /* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */
276 /* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */
277 /* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */
278 /* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */
279 /* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */
280 /* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */
281 /* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */
282 /* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */
283 /* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */
284 /* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */
285 /* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */
286 /* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */
287 /* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */
288 /* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */
289 /* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */
290 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */
291 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */
292 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */
293 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */
294 /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */
295 /* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */
296 /* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */
297 /* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */
298 /* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */
299 /* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */
300 /* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */
301 /* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */
302 /* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */
303 /* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */
304 /* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */
305 /* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */
306 /* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */
307 /* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */
308 /* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */
309 /* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */
310 /* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */
311 /* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */
312 /* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */
313 /* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */
314