1 #include <openacc.h>
2 #include <alloca.h>
3 #include <string.h>
4 #include <stdio.h>
5 #include <gomp-constants.h>
6 
7 #pragma acc routine seq
8 static int __attribute__ ((noinline))
coord(void)9 coord (void)
10 {
11   int res = 0;
12 
13   if (acc_on_device (acc_device_nvidia))
14     {
15       int g = 0, w = 0, v = 0;
16       g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
17       w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
18       v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
19 
20       res = (1 << 24) | (g << 16) | (w << 8) | v;
21     }
22 
23   return res;
24 }
25 
26 static int
check(const int * ary,int size,int gp,int wp,int vp)27 check (const int *ary, int size, int gp, int wp, int vp)
28 {
29   int exit = 0;
30   int ix;
31   int *gangs = (int *)alloca (gp * sizeof (int));
32   int *workers = (int *)alloca (wp * sizeof (int));
33   int *vectors = (int *)alloca (vp * sizeof (int));
34   int offloaded = 0;
35 
36   memset (gangs, 0, gp * sizeof (int));
37   memset (workers, 0, wp * sizeof (int));
38   memset (vectors, 0, vp * sizeof (int));
39 
40   for (ix = 0; ix < size; ix++)
41     {
42       int g = (ary[ix] >> 16) & 0xff;
43       int w = (ary[ix] >> 8) & 0xff;
44       int v = (ary[ix] >> 0) & 0xff;
45 
46       if (g >= gp || w >= wp || v >= vp)
47 	{
48 	  printf ("unexpected cpu %#x used\n", ary[ix]);
49 	  exit = 1;
50 	}
51       else
52 	{
53 	  vectors[v]++;
54 	  workers[w]++;
55 	  gangs[g]++;
56 	}
57       offloaded += ary[ix] >> 24;
58     }
59 
60   if (!offloaded)
61     return 0;
62 
63   if (offloaded != size)
64     {
65       printf ("offloaded %d times,  expected %d\n", offloaded, size);
66       return 1;
67     }
68 
69   for (ix = 0; ix < gp; ix++)
70     if (gangs[ix] != gangs[0])
71       {
72 	printf ("gang %d not used %d times\n", ix, gangs[0]);
73 	exit = 1;
74       }
75 
76   for (ix = 0; ix < wp; ix++)
77     if (workers[ix] != workers[0])
78       {
79 	printf ("worker %d not used %d times\n", ix, workers[0]);
80 	exit = 1;
81       }
82 
83   for (ix = 0; ix < vp; ix++)
84     if (vectors[ix] != vectors[0])
85       {
86 	printf ("vector %d not used %d times\n", ix, vectors[0]);
87 	exit = 1;
88       }
89 
90   return exit;
91 }
92 
93 #define N (32 * 32 * 32)
94 int ary[N];
95 
96 static int
check_gang(int gp)97 check_gang (int gp)
98 {
99 #pragma acc parallel copyout (ary)
100   {
101 #pragma acc loop gang (static:1)
102     for (int ix = 0; ix < N; ix++)
103       ary[ix] = coord ();
104   }
105 
106   return check (ary, N, gp, 1, 1);
107 }
108 
109 static int
check_worker(int wp)110 check_worker (int wp)
111 {
112 #pragma  acc parallel copyout (ary)
113   {
114 #pragma acc loop worker
115     for (int ix = 0; ix < N; ix++)
116       ary[ix] = coord ();
117   }
118 
119   return check (ary, N, 1, wp, 1);
120 }
121 
122 static int
check_vector(int vp)123 check_vector (int vp)
124 {
125 #pragma  acc parallel copyout (ary)
126   {
127 #pragma acc loop vector
128     for (int ix = 0; ix < N; ix++)
129       ary[ix] = coord ();
130   }
131 
132   return check (ary, N, 1, 1, vp);
133 }
134 
135 static int
test_1(int gp,int wp,int vp)136 test_1 (int gp, int wp, int vp)
137 {
138   int exit = 0;
139 
140   exit |= check_gang (gp);
141   exit |= check_worker (wp);
142   exit |= check_vector (vp);
143 
144   return exit;
145 }
146