1 /* { dg-additional-options "-fopenacc-dim=16:16" } */
2
3 #include <openacc.h>
4 #include <string.h>
5 #include <stdio.h>
6 #include <gomp-constants.h>
7
8 #pragma acc routine
coord()9 static int __attribute__ ((noinline)) coord ()
10 {
11 int res = 0;
12
13 if (acc_on_device (acc_device_not_host))
14 {
15 int g, w, v;
16
17 g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
18 w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
19 v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
20 res = (1 << 24) | (g << 16) | (w << 8) | v;
21 }
22 return res;
23 }
24
25
check(const int * ary,int size,int gp,int wp,int vp)26 int check (const int *ary, int size, int gp, int wp, int vp)
27 {
28 int exit = 0;
29 int ix;
30 int *gangs = (int *)__builtin_alloca (gp * sizeof (int));
31 int *workers = (int *)__builtin_alloca (wp * sizeof (int));
32 int *vectors = (int *)__builtin_alloca (vp * sizeof (int));
33 int offloaded = 0;
34
35 memset (gangs, 0, gp * sizeof (int));
36 memset (workers, 0, wp * sizeof (int));
37 memset (vectors, 0, vp * sizeof (int));
38
39 for (ix = 0; ix < size; ix++)
40 {
41 int g = (ary[ix] >> 16) & 0xff;
42 int w = (ary[ix] >> 8) & 0xff;
43 int v = (ary[ix] >> 0) & 0xff;
44
45 if (g >= gp || w >= wp || v >= vp)
46 {
47 printf ("unexpected cpu %#x used\n", ary[ix]);
48 exit = 1;
49 }
50 else
51 {
52 vectors[v]++;
53 workers[w]++;
54 gangs[g]++;
55 }
56 offloaded += ary[ix] >> 24;
57 }
58
59 if (!offloaded)
60 return 0;
61
62 if (offloaded != size)
63 {
64 printf ("offloaded %d times, expected %d\n", offloaded, size);
65 return 1;
66 }
67
68 for (ix = 0; ix < gp; ix++)
69 if (gangs[ix] != gangs[0])
70 {
71 printf ("gang %d not used %d times\n", ix, gangs[0]);
72 exit = 1;
73 }
74
75 for (ix = 0; ix < wp; ix++)
76 if (workers[ix] != workers[0])
77 {
78 printf ("worker %d not used %d times\n", ix, workers[0]);
79 exit = 1;
80 }
81
82 for (ix = 0; ix < vp; ix++)
83 if (vectors[ix] != vectors[0])
84 {
85 printf ("vector %d not used %d times\n", ix, vectors[0]);
86 exit = 1;
87 }
88
89 return exit;
90 }
91
92 #define N (32 *32*32)
93
test_1(int gp,int wp,int vp)94 int test_1 (int gp, int wp, int vp)
95 {
96 int ary[N];
97 int exit = 0;
98
99 #pragma acc parallel copyout (ary)
100 {
101 #pragma acc loop gang (static:1)
102 for (int ix = 0; ix < N; ix++)
103 ary[ix] = coord ();
104 }
105
106 exit |= check (ary, N, gp, 1, 1);
107
108 #pragma acc parallel copyout (ary)
109 {
110 #pragma acc loop worker
111 for (int ix = 0; ix < N; ix++)
112 ary[ix] = coord ();
113 }
114
115 exit |= check (ary, N, 1, wp, 1);
116
117 #pragma acc parallel copyout (ary)
118 {
119 #pragma acc loop vector
120 for (int ix = 0; ix < N; ix++)
121 ary[ix] = coord ();
122 }
123
124 exit |= check (ary, N, 1, 1, vp);
125
126 return exit;
127 }
128
main()129 int main ()
130 {
131 #ifdef ACC_DEVICE_TYPE_radeon
132 /* AMD GCN uses the autovectorizer for the vector dimension: the use
133 of a function call in vector-partitioned code in this test is not
134 currently supported. */
135 /* AMD GCN does not currently support multiple workers. This should be
136 set to 16 when that changes. */
137 return test_1 (16, 1, 1);
138 #else
139 return test_1 (16, 16, 32);
140 #endif
141 }
142