1 /* { dg-do compile { target { powerpc64le-*-* } } } */
2 /* { dg-options "-mdejagnu-cpu=power8 -O3" } */
3 /* { dg-final { scan-assembler "lxvd2x" } } */
4 /* { dg-final { scan-assembler "stxvd2x" } } */
5 /* { dg-final { scan-assembler "xxspltw" } } */
6 
7 /* Currently the analyze_swaps phase cannot optimize this loop because
8    of the presence of an UNSPEC_VSX_CVDPSPN.  At such time as this is
9    handled, we need to add a 'scan-assembler-not "xxpermdi"' directive to
10    this test.  */
11 #include <altivec.h>
12 void abort();
13 
14 #define N 4096
15 #define M 10000000
16 vector float ca[N][4] = {0};
17 vector float cb[N][4] = {0};
18 vector float cc[N][4] = {0};
19 
foo()20 __attribute__((noinline)) void foo ()
21 {
22   int i;
23   for (i = 0; i < N; i++) {
24     cc[i][0] = vec_mul(vec_splats(cb[i][0][0]), ca[i][0]);
25     cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][1]), ca[i][1]);
26     cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][2]), ca[i][2]);
27     cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][3]), ca[i][3]);
28 
29     cc[i][1] = vec_mul(vec_splats(cb[i][1][0]), ca[i][0]);
30     cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][1]), ca[i][1]);
31     cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][2]), ca[i][2]);
32     cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][3]), ca[i][3]);
33 
34     cc[i][2] = vec_mul(vec_splats(cb[i][2][0]), ca[i][0]);
35     cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][1]), ca[i][1]);
36     cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][2]), ca[i][2]);
37     cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][3]), ca[i][3]);
38 
39     cc[i][3] = vec_mul(vec_splats(cb[i][3][0]), ca[i][0]);
40     cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][1]), ca[i][1]);
41     cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][2]), ca[i][2]);
42     cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][3]), ca[i][3]);
43   }
44 }
45 
main()46 int main ()
47 {
48   foo ();
49   return 0;
50 }
51