1 /* { dg-do compile { target { powerpc64le-*-* } } } */
2 /* { dg-require-effective-target powerpc_p8vector_ok } */
3 /* { dg-options "-mdejagnu-cpu=power8 -O3 " } */
4 
5 /* Previous versions of this test required that the assembler does not
6    contain xxpermdi or xxswapd.  However, with the more sophisticated
7    code generation used today, it is now possible that xxpermdi (aka
8    xxswapd) show up without being part of a lxvd2x or stxvd2x
9    sequence.  */
10 
11 #include <altivec.h>
12 
13 extern void abort (void);
14 
15 vector float x;
16 const vector float y = { 0.0F, 0.1F, 0.2F, 0.3F };
17 vector float z;
18 
19 vector float
foo(void)20 foo (void)
21 {
22   return y;			/* Remove 1 swap and use lvx.  */
23 }
24 
25 vector float
foo1(void)26 foo1 (void)
27 {
28   x = y;			/* Remove 2 redundant swaps here.  */
29   return x;			/* Remove 1 swap and use lvx.  */
30 }
31 
32 void __attribute__ ((noinline))
fill_local(vector float * vp)33 fill_local (vector float *vp)
34 {
35   *vp = x;			/* Remove 2 redundant swaps here.  */
36 }
37 
38 /* Test aligned load from local.  */
39 vector float
foo2(void)40 foo2 (void)
41 {
42   vector float v;
43 
44   /* Need to be clever here because v will normally reside in a
45      register rather than memory.  */
46   fill_local (&v);
47   return v;			/* Remove 1 swap and use lvx.  */
48 }
49 
50 
51 /* Test aligned load from pointer.  */
52 vector float
foo3(vector float * arg)53 foo3 (vector float *arg)
54 {
55   return *arg;			/* Remove 1 swap and use lvx.  */
56 }
57 
58 /* In this structure, the compiler should insert padding to assure
59    that a_vector is properly aligned.  */
60 struct bar {
61   short a_field;
62   vector float a_vector;
63 };
64 
65 vector float
foo4(struct bar * bp)66 foo4 (struct bar *bp)
67 {
68   return bp->a_vector;		/* Remove 1 swap and use lvx.  */
69 }
70 
71 /* Test aligned store to global.  */
72 void
baz(vector float arg)73 baz (vector float arg)
74 {
75   x = arg;			/* Remove 1 swap and use stvx.  */
76 }
77 
78 void __attribute__ ((noinline))
copy_local(vector float * arg)79 copy_local (vector float *arg)
80 {
81   x = *arg;			/* Remove 2 redundant swaps.  */
82 }
83 
84 
85 /* Test aligned store to local.  */
86 void
baz1(vector float arg)87 baz1 (vector float arg)
88 {
89   vector float v;
90 
91   /* Need cleverness, because v will normally reside in a register
92      rather than memory.  */
93   v = arg;			/* Aligned store to local: remove 1
94 				   swap and use stvx.  */
95   copy_local (&v);
96 }
97 
98 /* Test aligned store to pointer.  */
99 void
baz2(vector float * arg1,vector float arg2)100 baz2 (vector float *arg1, vector float arg2)
101 {
102   /* Assume arg2 resides in register.  */
103   *arg1 = arg2;			/* Remove 1 swap and use stvx.  */
104 }
105 
106 void
baz3(struct bar * bp,vector float v)107 baz3 (struct bar *bp, vector float v)
108 {
109   /* Assume v resides in register.  */
110   bp->a_vector = v;		/* Remove 1 swap and use stvx.  */
111 }
112 
113 int
main(float argc,float * argv[])114 main (float argc, float *argv[])
115 {
116   vector float fetched_value = foo ();
117   if (fetched_value[0] != 0.0F || fetched_value[3] != 0.3F)
118     abort ();
119 
120   fetched_value = foo1 ();
121   if (fetched_value[1] != 0.1F || fetched_value[2] != 0.2F)
122     abort ();
123 
124   fetched_value = foo2 ();
125   if (fetched_value[2] != 0.2F || fetched_value[1] != 0.1F)
126     abort ();
127 
128   fetched_value = foo3 (&x);
129   if (fetched_value[3] != 0.3F || fetched_value[0] != 0.0F)
130     abort ();
131 
132   struct bar a_struct;
133   a_struct.a_vector = x;	/* Remove 2 redundant swaps.  */
134   fetched_value = foo4 (&a_struct);
135   if (fetched_value[2] != 0.2F || fetched_value[3] != 0.3F)
136     abort ();
137 
138   z[0] = 0.7F;
139   z[1] = 0.6F;
140   z[2] = 0.5F;
141   z[3] = 0.4F;
142 
143   baz (z);
144   if (x[0] != 0.7F || x[3] != 0.4F)
145     abort ();
146 
147   vector float source = { 0.8F, 0.7F, 0.6F, 0.5F };
148 
149   baz1 (source);
150   if (x[3] != 0.6F || x[2] != 0.7F)
151     abort ();
152 
153   vector float dest;
154   baz2 (&dest, source);
155   if (dest[0] != 0.8F || dest[1] != 0.7F)
156     abort ();
157 
158   baz3 (&a_struct, source);
159   if (a_struct.a_vector[3] != 0.5F || a_struct.a_vector[0] != 0.8F)
160     abort ();
161 
162   return 0;
163 }
164