1 /* { dg-do compile { target { powerpc64le-*-* } } } */
2 /* { dg-options "-mdejagnu-cpu=power8 -O3" } */
3 /* { dg-final { scan-assembler "lxvd2x" } } */
4 /* { dg-final { scan-assembler "stxvd2x" } } */
5 /* { dg-final { scan-assembler "xxspltw" } } */
6
7 /* Currently the analyze_swaps phase cannot optimize this loop because
8 of the presence of an UNSPEC_VSX_CVDPSPN. At such time as this is
9 handled, we need to add a 'scan-assembler-not "xxpermdi"' directive to
10 this test. */
11 #include <altivec.h>
12 void abort();
13
14 #define N 4096
15 #define M 10000000
16 vector float ca[N][4] = {0};
17 vector float cb[N][4] = {0};
18 vector float cc[N][4] = {0};
19
foo()20 __attribute__((noinline)) void foo ()
21 {
22 int i;
23 for (i = 0; i < N; i++) {
24 cc[i][0] = vec_mul(vec_splats(cb[i][0][0]), ca[i][0]);
25 cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][1]), ca[i][1]);
26 cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][2]), ca[i][2]);
27 cc[i][0] = vec_madd(cc[i][0],vec_splats(cb[i][0][3]), ca[i][3]);
28
29 cc[i][1] = vec_mul(vec_splats(cb[i][1][0]), ca[i][0]);
30 cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][1]), ca[i][1]);
31 cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][2]), ca[i][2]);
32 cc[i][1] = vec_madd(cc[i][0],vec_splats(cb[i][1][3]), ca[i][3]);
33
34 cc[i][2] = vec_mul(vec_splats(cb[i][2][0]), ca[i][0]);
35 cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][1]), ca[i][1]);
36 cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][2]), ca[i][2]);
37 cc[i][2] = vec_madd(cc[i][0],vec_splats(cb[i][2][3]), ca[i][3]);
38
39 cc[i][3] = vec_mul(vec_splats(cb[i][3][0]), ca[i][0]);
40 cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][1]), ca[i][1]);
41 cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][2]), ca[i][2]);
42 cc[i][3] = vec_madd(cc[i][0],vec_splats(cb[i][3][3]), ca[i][3]);
43 }
44 }
45
main()46 int main ()
47 {
48 foo ();
49 return 0;
50 }
51