1 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
3 /* { dg-require-effective-target powerpc_p8vector_ok } */
4 /* { dg-options "-mdejagnu-cpu=power8 -O3 -ffast-math" } */
5 
6 /* Taken from the Spec 2006 milc brenchmark.  Ultimately, GCC wants to generate
7    a DF splat from offsettable memory.  The register allocator decided it was
8    better to do the load in the GPR registers and do a move direct, rather than
9    doing a load in the VSX register sets.  */
10 
11 typedef struct
12 {
13   double real;
14   double imag;
15 } complex;
16 
17 typedef struct
18 {
19   double real;
20   double imag;
21 } double_complex;
22 
23 complex cmplx (double x, double y);
24 complex cadd (complex * a, complex * b);
25 complex cmul (complex * a, complex * b);
26 complex csub (complex * a, complex * b);
27 complex cdiv (complex * a, complex * b);
28 complex conjg (complex * a);
29 complex ce_itheta (double theta);
30 
31 double_complex dcmplx (double x, double y);
32 double_complex dcadd (double_complex * a, double_complex * b);
33 double_complex dcmul (double_complex * a, double_complex * b);
34 double_complex dcsub (double_complex * a, double_complex * b);
35 double_complex dcdiv (double_complex * a, double_complex * b);
36 double_complex dconjg (double_complex * a);
37 double_complex dcexp (double_complex * a);
38 double_complex dclog (double_complex * a);
39 double_complex dcsqrt (double_complex * z);
40 double_complex dce_itheta (double theta);
41 
42 typedef struct
43 {
44   unsigned long r0, r1, r2, r3, r4, r5, r6;
45   unsigned long multiplier, addend, ic_state;
46   double scale;
47 } double_prn;
48 
49 double myrand (double_prn * prn_pt);
50 
51 typedef struct
52 {
53   complex e[3][3];
54 } su3_matrix;
55 
56 typedef struct
57 {
58   complex c[3];
59 } su3_vector;
60 
61 typedef struct
62 {
63   complex m01, m02, m12;
64   double m00im, m11im, m22im;
65   double space;
66 } anti_hermitmat;
67 
68 typedef struct
69 {
70   complex e[2][2];
71 } su2_matrix;
72 typedef struct
73 {
74   su3_vector d[4];
75 } wilson_vector;
76 typedef struct
77 {
78   su3_vector h[2];
79 } half_wilson_vector;
80 typedef struct
81 {
82   wilson_vector c[3];
83 } color_wilson_vector;
84 typedef struct
85 {
86   wilson_vector d[4];
87 } spin_wilson_vector;
88 typedef struct
89 {
90   color_wilson_vector d[4];
91 } wilson_matrix;
92 typedef struct
93 {
94   spin_wilson_vector c[3];
95 } wilson_propagator;
96 
97 void mult_su3_nn (su3_matrix * a, su3_matrix * b, su3_matrix * c);
98 void mult_su3_na (su3_matrix * a, su3_matrix * b, su3_matrix * c);
99 void mult_su3_an (su3_matrix * a, su3_matrix * b, su3_matrix * c);
100 double realtrace_su3 (su3_matrix * a, su3_matrix * b);
101 complex trace_su3 (su3_matrix * a);
102 complex complextrace_su3 (su3_matrix * a, su3_matrix * b);
103 complex det_su3 (su3_matrix * a);
104 void add_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
105 void sub_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
106 void scalar_mult_su3_matrix (su3_matrix * src, double scalar,
107 			     su3_matrix * dest);
108 void scalar_mult_add_su3_matrix (su3_matrix * src1, su3_matrix * src2,
109 				 double scalar, su3_matrix * dest);
110 void scalar_mult_sub_su3_matrix (su3_matrix * src1, su3_matrix * src2,
111 				 double scalar, su3_matrix * dest);
112 void c_scalar_mult_su3mat (su3_matrix * src, complex * scalar,
113 			   su3_matrix * dest);
114 void c_scalar_mult_add_su3mat (su3_matrix * src1, su3_matrix * src2,
115 			       complex * scalar, su3_matrix * dest);
116 void c_scalar_mult_sub_su3mat (su3_matrix * src1, su3_matrix * src2,
117 			       complex * scalar, su3_matrix * dest);
118 void su3_adjoint (su3_matrix * a, su3_matrix * b);
119 void make_anti_hermitian (su3_matrix * m3, anti_hermitmat * ah3);
120 void random_anti_hermitian (anti_hermitmat * mat_antihermit,
121 			    double_prn * prn_pt);
122 void uncompress_anti_hermitian (anti_hermitmat * mat_anti, su3_matrix * mat);
123 void compress_anti_hermitian (su3_matrix * mat, anti_hermitmat * mat_anti);
124 void clear_su3mat (su3_matrix * dest);
125 void su3mat_copy (su3_matrix * a, su3_matrix * b);
126 void dumpmat (su3_matrix * m);
127 
128 void su3_projector (su3_vector * a, su3_vector * b, su3_matrix * c);
129 complex su3_dot (su3_vector * a, su3_vector * b);
130 double su3_rdot (su3_vector * a, su3_vector * b);
131 double magsq_su3vec (su3_vector * a);
132 void su3vec_copy (su3_vector * a, su3_vector * b);
133 void dumpvec (su3_vector * v);
134 void clearvec (su3_vector * v);
135 
136 void mult_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
137 void mult_su3_mat_vec_sum (su3_matrix * a, su3_vector * b, su3_vector * c);
138 void mult_su3_mat_vec_sum_4dir (su3_matrix * a, su3_vector * b0,
139 				su3_vector * b1, su3_vector * b2,
140 				su3_vector * b3, su3_vector * c);
141 void mult_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b, su3_vector * c);
142 void mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
143 void mult_adj_su3_mat_vec_4dir (su3_matrix * a, su3_vector * b,
144 				su3_vector * c);
145 void mult_adj_su3_mat_4vec (su3_matrix * mat, su3_vector * src,
146 			    su3_vector * dest0, su3_vector * dest1,
147 			    su3_vector * dest2, su3_vector * dest3);
148 void mult_adj_su3_mat_vec_sum (su3_matrix * a, su3_vector * b,
149 			       su3_vector * c);
150 void mult_adj_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b,
151 				su3_vector * c);
152 
153 void add_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
154 void sub_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
155 void sub_four_su3_vecs (su3_vector * a, su3_vector * b1, su3_vector * b2,
156 			su3_vector * b3, su3_vector * b4);
157 
158 void scalar_mult_su3_vector (su3_vector * src, double scalar,
159 			     su3_vector * dest);
160 void scalar_mult_add_su3_vector (su3_vector * src1, su3_vector * src2,
161 				 double scalar, su3_vector * dest);
162 void scalar_mult_sum_su3_vector (su3_vector * src1, su3_vector * src2,
163 				 double scalar);
164 void scalar_mult_sub_su3_vector (su3_vector * src1, su3_vector * src2,
165 				 double scalar, su3_vector * dest);
166 void scalar_mult_wvec (wilson_vector * src, double s, wilson_vector * dest);
167 void scalar_mult_hwvec (half_wilson_vector * src, double s,
168 			half_wilson_vector * dest);
169 void scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
170 			   double scalar, wilson_vector * dest);
171 void scalar_mult_addtm_wvec (wilson_vector * src1, wilson_vector * src2,
172 			     double scalar, wilson_vector * dest);
173 void c_scalar_mult_wvec (wilson_vector * src1, complex * phase,
174 			 wilson_vector * dest);
175 void c_scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
176 			     complex * phase, wilson_vector * dest);
177 void c_scalar_mult_add_wvec2 (wilson_vector * src1, wilson_vector * src2,
178 			      complex s, wilson_vector * dest);
179 void c_scalar_mult_su3vec (su3_vector * src, complex * phase,
180 			   su3_vector * dest);
181 void c_scalar_mult_add_su3vec (su3_vector * v1, complex * phase,
182 			       su3_vector * v2);
183 void c_scalar_mult_sub_su3vec (su3_vector * v1, complex * phase,
184 			       su3_vector * v2);
185 
186 void left_su2_hit_n (su2_matrix * u, int p, int q, su3_matrix * link);
187 void right_su2_hit_a (su2_matrix * u, int p, int q, su3_matrix * link);
188 void dumpsu2 (su2_matrix * u);
189 void mult_su2_mat_vec_elem_n (su2_matrix * u, complex * x0, complex * x1);
190 void mult_su2_mat_vec_elem_a (su2_matrix * u, complex * x0, complex * x1);
191 
192 void mult_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
193 			  wilson_vector * dest);
194 void mult_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
195 			 half_wilson_vector * dest);
196 void mult_adj_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
197 			      wilson_vector * dest);
198 void mult_adj_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
199 			     half_wilson_vector * dest);
200 
201 void add_wilson_vector (wilson_vector * src1, wilson_vector * src2,
202 			wilson_vector * dest);
203 void sub_wilson_vector (wilson_vector * src1, wilson_vector * src2,
204 			wilson_vector * dest);
205 double magsq_wvec (wilson_vector * src);
206 complex wvec_dot (wilson_vector * src1, wilson_vector * src2);
207 complex wvec2_dot (wilson_vector * src1, wilson_vector * src2);
208 double wvec_rdot (wilson_vector * a, wilson_vector * b);
209 
210 void wp_shrink (wilson_vector * src, half_wilson_vector * dest,
211 		int dir, int sign);
212 void wp_shrink_4dir (wilson_vector * a, half_wilson_vector * b1,
213 		     half_wilson_vector * b2, half_wilson_vector * b3,
214 		     half_wilson_vector * b4, int sign);
215 void wp_grow (half_wilson_vector * src, wilson_vector * dest,
216 	      int dir, int sign);
217 void wp_grow_add (half_wilson_vector * src, wilson_vector * dest,
218 		  int dir, int sign);
219 void grow_add_four_wvecs (wilson_vector * a, half_wilson_vector * b1,
220 			  half_wilson_vector * b2, half_wilson_vector * b3,
221 			  half_wilson_vector * b4, int sign, int sum);
222 void mult_by_gamma (wilson_vector * src, wilson_vector * dest, int dir);
223 void mult_by_gamma_left (wilson_matrix * src, wilson_matrix * dest, int dir);
224 void mult_by_gamma_right (wilson_matrix * src, wilson_matrix * dest, int dir);
225 void mult_swv_by_gamma_l (spin_wilson_vector * src, spin_wilson_vector * dest,
226 			  int dir);
227 void mult_swv_by_gamma_r (spin_wilson_vector * src, spin_wilson_vector * dest,
228 			  int dir);
229 void su3_projector_w (wilson_vector * a, wilson_vector * b, su3_matrix * c);
230 void clear_wvec (wilson_vector * dest);
231 void copy_wvec (wilson_vector * src, wilson_vector * dest);
232 void dump_wilson_vec (wilson_vector * src);
233 
234 double gaussian_rand_no (double_prn * prn_pt);
235 typedef int int32type;
236 typedef unsigned int u_int32type;
237 void byterevn (int32type w[], int n);
238 
239 void
mult_adj_su3_mat_vec(su3_matrix * a,su3_vector * b,su3_vector * c)240 mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c)
241 {
242   int i;
243   register double t, ar, ai, br, bi, cr, ci;
244   for (i = 0; i < 3; i++)
245     {
246       ar = a->e[0][i].real;
247       ai = a->e[0][i].imag;
248 
249       br = b->c[0].real;
250       bi = b->c[0].imag;
251 
252       cr = ar * br;
253       t = ai * bi;
254       cr += t;
255 
256       ci = ar * bi;
257       t = ai * br;
258       ci -= t;
259 
260       ar = a->e[1][i].real;
261       ai = a->e[1][i].imag;
262 
263       br = b->c[1].real;
264       bi = b->c[1].imag;
265 
266       t = ar * br;
267       cr += t;
268       t = ai * bi;
269       cr += t;
270 
271       t = ar * bi;
272       ci += t;
273       t = ai * br;
274       ci -= t;
275 
276       ar = a->e[2][i].real;
277       ai = a->e[2][i].imag;
278 
279       br = b->c[2].real;
280       bi = b->c[2].imag;
281 
282       t = ar * br;
283       cr += t;
284       t = ai * bi;
285       cr += t;
286 
287       t = ar * bi;
288       ci += t;
289       t = ai * br;
290       ci -= t;
291 
292       c->c[i].real = cr;
293       c->c[i].imag = ci;
294     }
295 }
296 
297 /* { dg-final { scan-assembler-not "mtvsrd" } } */
298