1 /* AMD GCN does not use 32-lane vectors, so the expected use counts mismatch.
2 { dg-skip-if "unsuitable dimensions" { openacc_radeon_accel_selected } { "*" } { "" } } */
3
4 /* { dg-additional-options "-fopenacc-dim=32" } */
5
6 #include <stdio.h>
7 #include <openacc.h>
8 #include <gomp-constants.h>
9
check(const int * ary,int size,int gp,int wp,int vp)10 static int check (const int *ary, int size, int gp, int wp, int vp)
11 {
12 int exit = 0;
13 int ix;
14 int gangs[32], workers[32], vectors[32];
15
16 for (ix = 0; ix < 32; ix++)
17 gangs[ix] = workers[ix] = vectors[ix] = 0;
18
19 for (ix = 0; ix < size; ix++)
20 {
21 vectors[ary[ix] & 0xff]++;
22 workers[(ary[ix] >> 8) & 0xff]++;
23 gangs[(ary[ix] >> 16) & 0xff]++;
24 }
25
26 for (ix = 0; ix < 32; ix++)
27 {
28 if (gp)
29 {
30 int expect = gangs[0];
31 if (gangs[ix] != expect)
32 {
33 exit = 1;
34 printf ("gang %d not used %d times\n", ix, expect);
35 }
36 }
37 else if (ix && gangs[ix])
38 {
39 exit = 1;
40 printf ("gang %d unexpectedly used\n", ix);
41 }
42
43 if (wp)
44 {
45 int expect = workers[0];
46 if (workers[ix] != expect)
47 {
48 exit = 1;
49 printf ("worker %d not used %d times\n", ix, expect);
50 }
51 }
52 else if (ix && workers[ix])
53 {
54 exit = 1;
55 printf ("worker %d unexpectedly used\n", ix);
56 }
57
58 if (vp)
59 {
60 int expect = vectors[0];
61 if (vectors[ix] != expect)
62 {
63 exit = 1;
64 printf ("vector %d not used %d times\n", ix, expect);
65 }
66 }
67 else if (ix && vectors[ix])
68 {
69 exit = 1;
70 printf ("vector %d unexpectedly used\n", ix);
71 }
72
73 }
74 return exit;
75 }
76
77 #pragma acc routine seq
place()78 static int __attribute__((noinline)) place ()
79 {
80 int r = 0;
81
82 if (acc_on_device (acc_device_not_host))
83 {
84 int g, w, v;
85
86 g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
87 w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
88 v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
89 r = (g << 16) | (w << 8) | v;
90 }
91 return r;
92 }
93
clear(int * ary,int size)94 static void clear (int *ary, int size)
95 {
96 int ix;
97
98 for (ix = 0; ix < size; ix++)
99 ary[ix] = -1;
100 }
101
gang_vector_1(int * ary,int size)102 int gang_vector_1 (int *ary, int size)
103 {
104 clear (ary, size);
105 #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
106 {
107 #pragma acc loop tile(128) gang vector
108 for (int jx = 0; jx < size; jx++)
109 ary[jx] = place ();
110 }
111
112 return check (ary, size, 1, 0, 1);
113 }
114
gang_vector_2a(int * ary,int size)115 int gang_vector_2a (int *ary, int size)
116 {
117 if (size % 256)
118 return 1;
119
120 clear (ary, size);
121 #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
122 {
123 #pragma acc loop tile(64, 64) gang vector
124 for (int jx = 0; jx < size / 256; jx++)
125 for (int ix = 0; ix < 256; ix++)
126 ary[jx * 256 + ix] = place ();
127 }
128
129 return check (ary, size, 1, 0, 1);
130 }
131
gang_vector_2b(int * ary,int size)132 int gang_vector_2b (int *ary, int size)
133 {
134 if (size % 256)
135 return 1;
136
137 clear (ary, size);
138 #pragma acc parallel vector_length(32) num_gangs (32) copy (ary[0:size]) firstprivate (size)
139 {
140 #pragma acc loop tile(64, 64) gang vector
141 for (int jx = 0; jx < size; jx += 256)
142 for (int ix = 0; ix < 256; ix++)
143 ary[jx + ix] = place ();
144 }
145
146 return check (ary, size, 1, 0, 1);
147 }
148
worker_vector_2a(int * ary,int size)149 int worker_vector_2a (int *ary, int size)
150 {
151 if (size % 256)
152 return 1;
153
154 clear (ary, size);
155 #pragma acc parallel vector_length(32) num_workers (32) copy (ary[0:size]) firstprivate (size)
156 {
157 #pragma acc loop tile(64, 64) worker vector
158 for (int jx = 0; jx < size / 256; jx++)
159 for (int ix = 0; ix < 256; ix++)
160 ary[jx * 256 + ix] = place ();
161 }
162
163 return check (ary, size, 0, 1, 1);
164 }
165
worker_vector_2b(int * ary,int size)166 int worker_vector_2b (int *ary, int size)
167 {
168 if (size % 256)
169 return 1;
170
171 clear (ary, size);
172 #pragma acc parallel vector_length(32) num_workers (32) copy (ary[0:size]) firstprivate (size)
173 {
174 #pragma acc loop tile(64, 64) worker vector
175 for (int jx = 0; jx < size; jx += 256)
176 for (int ix = 0; ix < 256; ix++)
177 ary[jx + ix] = place ();
178 }
179
180 return check (ary, size, 0, 1, 1);
181 }
182
gang_worker_vector_2a(int * ary,int size)183 int gang_worker_vector_2a (int *ary, int size)
184 {
185 if (size % 256)
186 return 1;
187 clear (ary, size);
188 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
189 {
190 #pragma acc loop tile(32, 32)
191 for (int jx = 0; jx < size / 256; jx++)
192 for (int ix = 0; ix < 256; ix++)
193 ary[jx * 256 + ix] = place ();
194 }
195
196 return check (ary, size, 1, 1, 1);
197 }
198
gang_worker_vector_2b(int * ary,int size)199 int gang_worker_vector_2b (int *ary, int size)
200 {
201 if (size % 256)
202 return 1;
203 clear (ary, size);
204 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
205 {
206 #pragma acc loop tile(32, 32)
207 for (int jx = 0; jx < size; jx += 256)
208 for (int ix = 0; ix < 256; ix++)
209 ary[jx + ix] = place ();
210 }
211
212 return check (ary, size, 1, 1, 1);
213 }
214
gang_worker_vector_star_2a(int * ary,int size)215 int gang_worker_vector_star_2a (int *ary, int size)
216 {
217 if (size % 256)
218 return 1;
219
220 clear (ary, size);
221 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
222 {
223 #pragma acc loop tile(*, *)
224 for (int jx = 0; jx < size / 256; jx++)
225 for (int ix = 0; ix < 256; ix++)
226 ary[jx * 256 + ix] = place ();
227 }
228
229 return check (ary, size, 1, 1, 1);
230 }
231
gang_worker_vector_star_2b(int * ary,int size)232 int gang_worker_vector_star_2b (int *ary, int size)
233 {
234 if (size % 256)
235 return 1;
236
237 clear (ary, size);
238 #pragma acc parallel vector_length(32) num_workers (32) num_gangs(32) copy (ary[0:size]) firstprivate (size)
239 {
240 #pragma acc loop tile(*, *)
241 for (int jx = 0; jx < size; jx +=256)
242 for (int ix = 0; ix < 256; ix++)
243 ary[jx + ix] = place ();
244 }
245
246 return check (ary, size, 1, 1, 1);
247 }
248
249 #define N (32*32*32*8)
main()250 int main ()
251 {
252 int ondev = 0;
253
254 #pragma acc parallel copy(ondev)
255 {
256 ondev = acc_on_device (acc_device_not_host);
257 }
258 if (!ondev)
259 return 0;
260
261 int ary[N];
262 if (gang_vector_1 (ary, N))
263 return 1;
264 if (gang_vector_2a (ary, N))
265 return 1;
266 if (worker_vector_2a (ary, N))
267 return 1;
268 if (gang_worker_vector_2a (ary, N))
269 return 1;
270 if (gang_worker_vector_star_2a (ary, N))
271 return 1;
272 if (gang_vector_2b (ary, N))
273 return 1;
274 if (worker_vector_2b (ary, N))
275 return 1;
276 if (gang_worker_vector_2b (ary, N))
277 return 1;
278 if (gang_worker_vector_star_2b (ary, N))
279 return 1;
280 return 0;
281 }
282