1 /* { dg-do run { target openacc_nvidia_accel_selected } } */
2 /* { dg-additional-options "-lcuda" } */
3
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <unistd.h>
7 #include <openacc.h>
8 #include <cuda.h>
9
10 int
main(int argc,char ** argv)11 main (int argc, char **argv)
12 {
13 CUdevice dev;
14 CUfunction delay2;
15 CUmodule module;
16 CUresult r;
17 int N;
18 int i;
19 CUstream *streams;
20 unsigned long **a, **d_a, *tid, ticks;
21 int nbytes;
22 void *kargs[3];
23 int clkrate;
24 int devnum, nprocs;
25
26 acc_init (acc_device_nvidia);
27
28 devnum = acc_get_device_num (acc_device_nvidia);
29
30 r = cuDeviceGet (&dev, devnum);
31 if (r != CUDA_SUCCESS)
32 {
33 fprintf (stderr, "cuDeviceGet failed: %d\n", r);
34 abort ();
35 }
36
37 r =
38 cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
39 dev);
40 if (r != CUDA_SUCCESS)
41 {
42 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
43 abort ();
44 }
45
46 r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
47 if (r != CUDA_SUCCESS)
48 {
49 fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
50 abort ();
51 }
52
53 r = cuModuleLoad (&module, "subr.ptx");
54 if (r != CUDA_SUCCESS)
55 {
56 fprintf (stderr, "cuModuleLoad failed: %d\n", r);
57 abort ();
58 }
59
60 r = cuModuleGetFunction (&delay2, module, "delay2");
61 if (r != CUDA_SUCCESS)
62 {
63 fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
64 abort ();
65 }
66
67 nbytes = sizeof (int);
68
69 ticks = (unsigned long) (200.0 * clkrate);
70
71 N = nprocs;
72
73 streams = (CUstream *) malloc (N * sizeof (void *));
74
75 a = (unsigned long **) malloc (N * sizeof (unsigned long *));
76 d_a = (unsigned long **) malloc (N * sizeof (unsigned long *));
77 tid = (unsigned long *) malloc (N * sizeof (unsigned long));
78
79 for (i = 0; i < N; i++)
80 {
81 a[i] = (unsigned long *) malloc (sizeof (unsigned long));
82 *a[i] = N;
83 d_a[i] = (unsigned long *) acc_malloc (nbytes);
84 tid[i] = i;
85
86 acc_map_data (a[i], d_a[i], nbytes);
87
88 streams[i] = (CUstream) acc_get_cuda_stream (i);
89 if (streams[i] != NULL)
90 abort ();
91
92 r = cuStreamCreate (&streams[i], CU_STREAM_DEFAULT);
93 if (r != CUDA_SUCCESS)
94 {
95 fprintf (stderr, "cuStreamCreate failed: %d\n", r);
96 abort ();
97 }
98
99 if (!acc_set_cuda_stream (i, streams[i]))
100 abort ();
101 }
102
103 for (i = 0; i < N; i++)
104 {
105 kargs[0] = (void *) &d_a[i];
106 kargs[1] = (void *) &ticks;
107 kargs[2] = (void *) &tid[i];
108
109 r = cuLaunchKernel (delay2, 1, 1, 1, 1, 1, 1, 0, streams[i], kargs, 0);
110 if (r != CUDA_SUCCESS)
111 {
112 fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
113 abort ();
114 }
115
116 ticks = (unsigned long) (50.0 * clkrate);
117 }
118
119 acc_wait_all_async (0);
120
121 for (i = 0; i < N; i++)
122 {
123 acc_copyout (a[i], nbytes);
124 if (*a[i] != i)
125 abort ();
126 }
127
128 free (streams);
129
130 for (i = 0; i < N; i++)
131 {
132 free (a[i]);
133 }
134
135 free (a);
136 free (d_a);
137 free (tid);
138
139 acc_shutdown (acc_device_nvidia);
140
141 exit (0);
142 }
143
144 /* { dg-output "" } */
145