1 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
2 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
3 /* { dg-require-effective-target powerpc_p8vector_ok } */
4 /* { dg-options "-mdejagnu-cpu=power8 -O3 -ffast-math" } */
5
6 /* Taken from the Spec 2006 milc brenchmark. Ultimately, GCC wants to generate
7 a DF splat from offsettable memory. The register allocator decided it was
8 better to do the load in the GPR registers and do a move direct, rather than
9 doing a load in the VSX register sets. */
10
11 typedef struct
12 {
13 double real;
14 double imag;
15 } complex;
16
17 typedef struct
18 {
19 double real;
20 double imag;
21 } double_complex;
22
23 complex cmplx (double x, double y);
24 complex cadd (complex * a, complex * b);
25 complex cmul (complex * a, complex * b);
26 complex csub (complex * a, complex * b);
27 complex cdiv (complex * a, complex * b);
28 complex conjg (complex * a);
29 complex ce_itheta (double theta);
30
31 double_complex dcmplx (double x, double y);
32 double_complex dcadd (double_complex * a, double_complex * b);
33 double_complex dcmul (double_complex * a, double_complex * b);
34 double_complex dcsub (double_complex * a, double_complex * b);
35 double_complex dcdiv (double_complex * a, double_complex * b);
36 double_complex dconjg (double_complex * a);
37 double_complex dcexp (double_complex * a);
38 double_complex dclog (double_complex * a);
39 double_complex dcsqrt (double_complex * z);
40 double_complex dce_itheta (double theta);
41
42 typedef struct
43 {
44 unsigned long r0, r1, r2, r3, r4, r5, r6;
45 unsigned long multiplier, addend, ic_state;
46 double scale;
47 } double_prn;
48
49 double myrand (double_prn * prn_pt);
50
51 typedef struct
52 {
53 complex e[3][3];
54 } su3_matrix;
55
56 typedef struct
57 {
58 complex c[3];
59 } su3_vector;
60
61 typedef struct
62 {
63 complex m01, m02, m12;
64 double m00im, m11im, m22im;
65 double space;
66 } anti_hermitmat;
67
68 typedef struct
69 {
70 complex e[2][2];
71 } su2_matrix;
72 typedef struct
73 {
74 su3_vector d[4];
75 } wilson_vector;
76 typedef struct
77 {
78 su3_vector h[2];
79 } half_wilson_vector;
80 typedef struct
81 {
82 wilson_vector c[3];
83 } color_wilson_vector;
84 typedef struct
85 {
86 wilson_vector d[4];
87 } spin_wilson_vector;
88 typedef struct
89 {
90 color_wilson_vector d[4];
91 } wilson_matrix;
92 typedef struct
93 {
94 spin_wilson_vector c[3];
95 } wilson_propagator;
96
97 void mult_su3_nn (su3_matrix * a, su3_matrix * b, su3_matrix * c);
98 void mult_su3_na (su3_matrix * a, su3_matrix * b, su3_matrix * c);
99 void mult_su3_an (su3_matrix * a, su3_matrix * b, su3_matrix * c);
100 double realtrace_su3 (su3_matrix * a, su3_matrix * b);
101 complex trace_su3 (su3_matrix * a);
102 complex complextrace_su3 (su3_matrix * a, su3_matrix * b);
103 complex det_su3 (su3_matrix * a);
104 void add_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
105 void sub_su3_matrix (su3_matrix * a, su3_matrix * b, su3_matrix * c);
106 void scalar_mult_su3_matrix (su3_matrix * src, double scalar,
107 su3_matrix * dest);
108 void scalar_mult_add_su3_matrix (su3_matrix * src1, su3_matrix * src2,
109 double scalar, su3_matrix * dest);
110 void scalar_mult_sub_su3_matrix (su3_matrix * src1, su3_matrix * src2,
111 double scalar, su3_matrix * dest);
112 void c_scalar_mult_su3mat (su3_matrix * src, complex * scalar,
113 su3_matrix * dest);
114 void c_scalar_mult_add_su3mat (su3_matrix * src1, su3_matrix * src2,
115 complex * scalar, su3_matrix * dest);
116 void c_scalar_mult_sub_su3mat (su3_matrix * src1, su3_matrix * src2,
117 complex * scalar, su3_matrix * dest);
118 void su3_adjoint (su3_matrix * a, su3_matrix * b);
119 void make_anti_hermitian (su3_matrix * m3, anti_hermitmat * ah3);
120 void random_anti_hermitian (anti_hermitmat * mat_antihermit,
121 double_prn * prn_pt);
122 void uncompress_anti_hermitian (anti_hermitmat * mat_anti, su3_matrix * mat);
123 void compress_anti_hermitian (su3_matrix * mat, anti_hermitmat * mat_anti);
124 void clear_su3mat (su3_matrix * dest);
125 void su3mat_copy (su3_matrix * a, su3_matrix * b);
126 void dumpmat (su3_matrix * m);
127
128 void su3_projector (su3_vector * a, su3_vector * b, su3_matrix * c);
129 complex su3_dot (su3_vector * a, su3_vector * b);
130 double su3_rdot (su3_vector * a, su3_vector * b);
131 double magsq_su3vec (su3_vector * a);
132 void su3vec_copy (su3_vector * a, su3_vector * b);
133 void dumpvec (su3_vector * v);
134 void clearvec (su3_vector * v);
135
136 void mult_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
137 void mult_su3_mat_vec_sum (su3_matrix * a, su3_vector * b, su3_vector * c);
138 void mult_su3_mat_vec_sum_4dir (su3_matrix * a, su3_vector * b0,
139 su3_vector * b1, su3_vector * b2,
140 su3_vector * b3, su3_vector * c);
141 void mult_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b, su3_vector * c);
142 void mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c);
143 void mult_adj_su3_mat_vec_4dir (su3_matrix * a, su3_vector * b,
144 su3_vector * c);
145 void mult_adj_su3_mat_4vec (su3_matrix * mat, su3_vector * src,
146 su3_vector * dest0, su3_vector * dest1,
147 su3_vector * dest2, su3_vector * dest3);
148 void mult_adj_su3_mat_vec_sum (su3_matrix * a, su3_vector * b,
149 su3_vector * c);
150 void mult_adj_su3_mat_vec_nsum (su3_matrix * a, su3_vector * b,
151 su3_vector * c);
152
153 void add_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
154 void sub_su3_vector (su3_vector * a, su3_vector * b, su3_vector * c);
155 void sub_four_su3_vecs (su3_vector * a, su3_vector * b1, su3_vector * b2,
156 su3_vector * b3, su3_vector * b4);
157
158 void scalar_mult_su3_vector (su3_vector * src, double scalar,
159 su3_vector * dest);
160 void scalar_mult_add_su3_vector (su3_vector * src1, su3_vector * src2,
161 double scalar, su3_vector * dest);
162 void scalar_mult_sum_su3_vector (su3_vector * src1, su3_vector * src2,
163 double scalar);
164 void scalar_mult_sub_su3_vector (su3_vector * src1, su3_vector * src2,
165 double scalar, su3_vector * dest);
166 void scalar_mult_wvec (wilson_vector * src, double s, wilson_vector * dest);
167 void scalar_mult_hwvec (half_wilson_vector * src, double s,
168 half_wilson_vector * dest);
169 void scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
170 double scalar, wilson_vector * dest);
171 void scalar_mult_addtm_wvec (wilson_vector * src1, wilson_vector * src2,
172 double scalar, wilson_vector * dest);
173 void c_scalar_mult_wvec (wilson_vector * src1, complex * phase,
174 wilson_vector * dest);
175 void c_scalar_mult_add_wvec (wilson_vector * src1, wilson_vector * src2,
176 complex * phase, wilson_vector * dest);
177 void c_scalar_mult_add_wvec2 (wilson_vector * src1, wilson_vector * src2,
178 complex s, wilson_vector * dest);
179 void c_scalar_mult_su3vec (su3_vector * src, complex * phase,
180 su3_vector * dest);
181 void c_scalar_mult_add_su3vec (su3_vector * v1, complex * phase,
182 su3_vector * v2);
183 void c_scalar_mult_sub_su3vec (su3_vector * v1, complex * phase,
184 su3_vector * v2);
185
186 void left_su2_hit_n (su2_matrix * u, int p, int q, su3_matrix * link);
187 void right_su2_hit_a (su2_matrix * u, int p, int q, su3_matrix * link);
188 void dumpsu2 (su2_matrix * u);
189 void mult_su2_mat_vec_elem_n (su2_matrix * u, complex * x0, complex * x1);
190 void mult_su2_mat_vec_elem_a (su2_matrix * u, complex * x0, complex * x1);
191
192 void mult_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
193 wilson_vector * dest);
194 void mult_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
195 half_wilson_vector * dest);
196 void mult_adj_mat_wilson_vec (su3_matrix * mat, wilson_vector * src,
197 wilson_vector * dest);
198 void mult_adj_su3_mat_hwvec (su3_matrix * mat, half_wilson_vector * src,
199 half_wilson_vector * dest);
200
201 void add_wilson_vector (wilson_vector * src1, wilson_vector * src2,
202 wilson_vector * dest);
203 void sub_wilson_vector (wilson_vector * src1, wilson_vector * src2,
204 wilson_vector * dest);
205 double magsq_wvec (wilson_vector * src);
206 complex wvec_dot (wilson_vector * src1, wilson_vector * src2);
207 complex wvec2_dot (wilson_vector * src1, wilson_vector * src2);
208 double wvec_rdot (wilson_vector * a, wilson_vector * b);
209
210 void wp_shrink (wilson_vector * src, half_wilson_vector * dest,
211 int dir, int sign);
212 void wp_shrink_4dir (wilson_vector * a, half_wilson_vector * b1,
213 half_wilson_vector * b2, half_wilson_vector * b3,
214 half_wilson_vector * b4, int sign);
215 void wp_grow (half_wilson_vector * src, wilson_vector * dest,
216 int dir, int sign);
217 void wp_grow_add (half_wilson_vector * src, wilson_vector * dest,
218 int dir, int sign);
219 void grow_add_four_wvecs (wilson_vector * a, half_wilson_vector * b1,
220 half_wilson_vector * b2, half_wilson_vector * b3,
221 half_wilson_vector * b4, int sign, int sum);
222 void mult_by_gamma (wilson_vector * src, wilson_vector * dest, int dir);
223 void mult_by_gamma_left (wilson_matrix * src, wilson_matrix * dest, int dir);
224 void mult_by_gamma_right (wilson_matrix * src, wilson_matrix * dest, int dir);
225 void mult_swv_by_gamma_l (spin_wilson_vector * src, spin_wilson_vector * dest,
226 int dir);
227 void mult_swv_by_gamma_r (spin_wilson_vector * src, spin_wilson_vector * dest,
228 int dir);
229 void su3_projector_w (wilson_vector * a, wilson_vector * b, su3_matrix * c);
230 void clear_wvec (wilson_vector * dest);
231 void copy_wvec (wilson_vector * src, wilson_vector * dest);
232 void dump_wilson_vec (wilson_vector * src);
233
234 double gaussian_rand_no (double_prn * prn_pt);
235 typedef int int32type;
236 typedef unsigned int u_int32type;
237 void byterevn (int32type w[], int n);
238
239 void
mult_adj_su3_mat_vec(su3_matrix * a,su3_vector * b,su3_vector * c)240 mult_adj_su3_mat_vec (su3_matrix * a, su3_vector * b, su3_vector * c)
241 {
242 int i;
243 register double t, ar, ai, br, bi, cr, ci;
244 for (i = 0; i < 3; i++)
245 {
246 ar = a->e[0][i].real;
247 ai = a->e[0][i].imag;
248
249 br = b->c[0].real;
250 bi = b->c[0].imag;
251
252 cr = ar * br;
253 t = ai * bi;
254 cr += t;
255
256 ci = ar * bi;
257 t = ai * br;
258 ci -= t;
259
260 ar = a->e[1][i].real;
261 ai = a->e[1][i].imag;
262
263 br = b->c[1].real;
264 bi = b->c[1].imag;
265
266 t = ar * br;
267 cr += t;
268 t = ai * bi;
269 cr += t;
270
271 t = ar * bi;
272 ci += t;
273 t = ai * br;
274 ci -= t;
275
276 ar = a->e[2][i].real;
277 ai = a->e[2][i].imag;
278
279 br = b->c[2].real;
280 bi = b->c[2].imag;
281
282 t = ar * br;
283 cr += t;
284 t = ai * bi;
285 cr += t;
286
287 t = ar * bi;
288 ci += t;
289 t = ai * br;
290 ci -= t;
291
292 c->c[i].real = cr;
293 c->c[i].imag = ci;
294 }
295 }
296
297 /* { dg-final { scan-assembler-not "mtvsrd" } } */
298