1 #include <openacc.h>
2 #include <string.h>
3 #include <stdio.h>
4 #include <gomp-constants.h>
5
6 #pragma acc routine seq
7 static int __attribute__ ((noinline))
coord(void)8 coord (void)
9 {
10 int res = 0;
11
12 if (acc_on_device (acc_device_nvidia))
13 {
14 int g = 0, w = 0, v = 0;
15 g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
16 w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
17 v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
18
19 res = (1 << 24) | (g << 16) | (w << 8) | v;
20 }
21
22 return res;
23 }
24
25 static int
check(const int * ary,int size,int gp,int wp,int vp)26 check (const int *ary, int size, int gp, int wp, int vp)
27 {
28 int exit = 0;
29 int ix;
30 int *gangs = (int *)__builtin_alloca (gp * sizeof (int));
31 int *workers = (int *)__builtin_alloca (wp * sizeof (int));
32 int *vectors = (int *)__builtin_alloca (vp * sizeof (int));
33 int offloaded = 0;
34
35 memset (gangs, 0, gp * sizeof (int));
36 memset (workers, 0, wp * sizeof (int));
37 memset (vectors, 0, vp * sizeof (int));
38
39 for (ix = 0; ix < size; ix++)
40 {
41 int g = (ary[ix] >> 16) & 0xff;
42 int w = (ary[ix] >> 8) & 0xff;
43 int v = (ary[ix] >> 0) & 0xff;
44
45 if (g >= gp || w >= wp || v >= vp)
46 {
47 printf ("unexpected cpu %#x used\n", ary[ix]);
48 exit = 1;
49 }
50 else
51 {
52 vectors[v]++;
53 workers[w]++;
54 gangs[g]++;
55 }
56 offloaded += ary[ix] >> 24;
57 }
58
59 if (!offloaded)
60 return 0;
61
62 if (offloaded != size)
63 {
64 printf ("offloaded %d times, expected %d\n", offloaded, size);
65 return 1;
66 }
67
68 for (ix = 0; ix < gp; ix++)
69 if (gangs[ix] != gangs[0])
70 {
71 printf ("gang %d not used %d times\n", ix, gangs[0]);
72 exit = 1;
73 }
74
75 for (ix = 0; ix < wp; ix++)
76 if (workers[ix] != workers[0])
77 {
78 printf ("worker %d not used %d times\n", ix, workers[0]);
79 exit = 1;
80 }
81
82 for (ix = 0; ix < vp; ix++)
83 if (vectors[ix] != vectors[0])
84 {
85 printf ("vector %d not used %d times\n", ix, vectors[0]);
86 exit = 1;
87 }
88
89 return exit;
90 }
91
92 #define N (32 * 32 * 32)
93 int ary[N];
94
95 static int
check_gang(int gp)96 check_gang (int gp)
97 {
98 #pragma acc parallel copyout (ary)
99 {
100 #pragma acc loop gang (static:1)
101 for (int ix = 0; ix < N; ix++)
102 ary[ix] = coord ();
103 }
104
105 return check (ary, N, gp, 1, 1);
106 }
107
108 static int
check_worker(int wp)109 check_worker (int wp)
110 {
111 #pragma acc parallel copyout (ary)
112 {
113 #pragma acc loop worker
114 for (int ix = 0; ix < N; ix++)
115 ary[ix] = coord ();
116 }
117
118 return check (ary, N, 1, wp, 1);
119 }
120
121 static int
check_vector(int vp)122 check_vector (int vp)
123 {
124 #pragma acc parallel copyout (ary)
125 {
126 #pragma acc loop vector
127 for (int ix = 0; ix < N; ix++)
128 ary[ix] = coord ();
129 }
130
131 return check (ary, N, 1, 1, vp);
132 }
133
134 static int
test_1(int gp,int wp,int vp)135 test_1 (int gp, int wp, int vp)
136 {
137 int exit = 0;
138
139 exit |= check_gang (gp);
140 exit |= check_worker (wp);
141 exit |= check_vector (vp);
142
143 return exit;
144 }
145