1 #include <openacc.h>
2 #include <alloca.h>
3 #include <string.h>
4 #include <stdio.h>
5 #include <gomp-constants.h>
6
7 #pragma acc routine seq
8 static int __attribute__ ((noinline))
coord(void)9 coord (void)
10 {
11 int res = 0;
12
13 if (acc_on_device (acc_device_nvidia))
14 {
15 int g = 0, w = 0, v = 0;
16 g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
17 w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
18 v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
19
20 res = (1 << 24) | (g << 16) | (w << 8) | v;
21 }
22
23 return res;
24 }
25
26 static int
check(const int * ary,int size,int gp,int wp,int vp)27 check (const int *ary, int size, int gp, int wp, int vp)
28 {
29 int exit = 0;
30 int ix;
31 int *gangs = (int *)alloca (gp * sizeof (int));
32 int *workers = (int *)alloca (wp * sizeof (int));
33 int *vectors = (int *)alloca (vp * sizeof (int));
34 int offloaded = 0;
35
36 memset (gangs, 0, gp * sizeof (int));
37 memset (workers, 0, wp * sizeof (int));
38 memset (vectors, 0, vp * sizeof (int));
39
40 for (ix = 0; ix < size; ix++)
41 {
42 int g = (ary[ix] >> 16) & 0xff;
43 int w = (ary[ix] >> 8) & 0xff;
44 int v = (ary[ix] >> 0) & 0xff;
45
46 if (g >= gp || w >= wp || v >= vp)
47 {
48 printf ("unexpected cpu %#x used\n", ary[ix]);
49 exit = 1;
50 }
51 else
52 {
53 vectors[v]++;
54 workers[w]++;
55 gangs[g]++;
56 }
57 offloaded += ary[ix] >> 24;
58 }
59
60 if (!offloaded)
61 return 0;
62
63 if (offloaded != size)
64 {
65 printf ("offloaded %d times, expected %d\n", offloaded, size);
66 return 1;
67 }
68
69 for (ix = 0; ix < gp; ix++)
70 if (gangs[ix] != gangs[0])
71 {
72 printf ("gang %d not used %d times\n", ix, gangs[0]);
73 exit = 1;
74 }
75
76 for (ix = 0; ix < wp; ix++)
77 if (workers[ix] != workers[0])
78 {
79 printf ("worker %d not used %d times\n", ix, workers[0]);
80 exit = 1;
81 }
82
83 for (ix = 0; ix < vp; ix++)
84 if (vectors[ix] != vectors[0])
85 {
86 printf ("vector %d not used %d times\n", ix, vectors[0]);
87 exit = 1;
88 }
89
90 return exit;
91 }
92
93 #define N (32 * 32 * 32)
94 int ary[N];
95
96 static int
check_gang(int gp)97 check_gang (int gp)
98 {
99 #pragma acc parallel copyout (ary)
100 {
101 #pragma acc loop gang (static:1)
102 for (int ix = 0; ix < N; ix++)
103 ary[ix] = coord ();
104 }
105
106 return check (ary, N, gp, 1, 1);
107 }
108
109 static int
check_worker(int wp)110 check_worker (int wp)
111 {
112 #pragma acc parallel copyout (ary)
113 {
114 #pragma acc loop worker
115 for (int ix = 0; ix < N; ix++)
116 ary[ix] = coord ();
117 }
118
119 return check (ary, N, 1, wp, 1);
120 }
121
122 static int
check_vector(int vp)123 check_vector (int vp)
124 {
125 #pragma acc parallel copyout (ary)
126 {
127 #pragma acc loop vector
128 for (int ix = 0; ix < N; ix++)
129 ary[ix] = coord ();
130 }
131
132 return check (ary, N, 1, 1, vp);
133 }
134
135 static int
test_1(int gp,int wp,int vp)136 test_1 (int gp, int wp, int vp)
137 {
138 int exit = 0;
139
140 exit |= check_gang (gp);
141 exit |= check_worker (wp);
142 exit |= check_vector (vp);
143
144 return exit;
145 }
146