1 
2 #if !defined(__hppa__) || !defined(__hpux__)
3 #include <complex.h>
4 #endif
5 
6 /* Double float has 53 bits of fraction. */
7 #define FRAC (1.0 / (1LL << 48))
8 typedef double _Complex Type;
9 
close_enough(Type a,Type b)10 int close_enough (Type a, Type b)
11 {
12   Type diff = a - b;
13   double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
14   double mag2_diff = (__real__(diff) * __real__ (diff)
15 		     + __imag__ (diff) * __imag__ (diff));
16 
17   return mag2_diff / mag2_a < (FRAC * FRAC);
18 }
19 
20 #define N 100
21 
22 static int __attribute__ ((noinline))
vector(Type ary[N],Type sum,Type prod)23 vector (Type ary[N], Type sum, Type prod)
24 {
25   Type tsum = 0, tprod = 1;
26 
27 #pragma acc parallel vector_length(32) copyin(ary[0:N])
28   {
29 #pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
30     for (int ix = 0; ix < N; ix++)
31       {
32 	tsum += ary[ix];
33 	tprod *= ary[ix];
34       }
35   }
36 
37   if (!close_enough (sum, tsum))
38     return 1;
39 
40   if (!close_enough (prod, tprod))
41     return 1;
42 
43   return 0;
44 }
45 
46 static int __attribute__ ((noinline))
worker(Type ary[N],Type sum,Type prod)47 worker (Type ary[N], Type sum, Type prod)
48 {
49   Type tsum = 0, tprod = 1;
50 
51 #pragma acc parallel num_workers(32) copyin(ary[0:N])
52   {
53 #pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
54     for (int ix = 0; ix < N; ix++)
55       {
56 	tsum += ary[ix];
57 	tprod *= ary[ix];
58       }
59   }
60 
61   if (!close_enough (sum, tsum))
62     return 1;
63 
64   if (!close_enough (prod, tprod))
65     return 1;
66 
67   return 0;
68 }
69 
70 static int __attribute__ ((noinline))
gang(Type ary[N],Type sum,Type prod)71 gang (Type ary[N], Type sum, Type prod)
72 {
73   Type tsum = 0, tprod = 1;
74 
75 #pragma acc parallel num_gangs (32) copyin(ary[0:N])
76   {
77 #pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
78     for (int ix = 0; ix < N; ix++)
79       {
80 	tsum += ary[ix];
81 	tprod *= ary[ix];
82       }
83   }
84 
85   if (!close_enough (sum, tsum))
86     return 1;
87 
88   if (!close_enough (prod, tprod))
89     return 1;
90 
91   return 0;
92 }
93 
main(void)94 int main (void)
95 {
96   Type ary[N], sum = 0, prod = 1;
97 
98   for (int ix = 0; ix < N;  ix++)
99     {
100       double frac = ix * (1.0 / 1024) + 1.0;
101 
102       ary[ix] = frac + frac * 2.0j - 1.0j;
103       sum += ary[ix];
104       prod *= ary[ix];
105     }
106 
107   if (vector (ary, sum, prod))
108     return 1;
109 
110   if (worker (ary, sum, prod))
111     return 1;
112 
113   if (gang (ary, sum, prod))
114     return 1;
115 
116   return 0;
117 }
118